From b1960c1108d9d976c49829927a51edbf4561f746 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Sat, 27 Jun 2026 11:06:24 +0800 Subject: [PATCH 1/7] add document Signed-off-by: kerthcet --- README.md | 5 +- docs/DEVELOP.md | 4 +- docs/{ => proposals}/PROTOCOL.md | 0 docs/proposals/SNAPSHOTS.md | 371 +++++++++++++++++++++++++++++++ docs/{ => proposals}/TUNNEL.md | 0 examples/tunnel-simple/README.md | 4 +- hack/docker/README.md | 2 +- 7 files changed, 379 insertions(+), 7 deletions(-) rename docs/{ => proposals}/PROTOCOL.md (100%) create mode 100644 docs/proposals/SNAPSHOTS.md rename docs/{ => proposals}/TUNNEL.md (100%) diff --git a/README.md b/README.md index d5849eb..02cc6e2 100644 --- a/README.md +++ b/README.md @@ -148,13 +148,14 @@ server = Server(connect="tunnel", tunnel_config=config) # ✓ No public IPs required ``` -See [Tunnel Mode Guide](./docs/TUNNEL.md) for setup instructions. +See [Tunnel Mode Guide](./docs/proposals/TUNNEL.md) for setup instructions. ## Documentation - [Quick Start Guide](./docs/QUICKSTART.md) - [Architecture Details](./docs/ARCHITECTURE.md) -- [Protocol Specification](./docs/PROTOCOL.md) +- [Protocol Specification](./docs/proposals/PROTOCOL.md) +- [Tunnel Mode Guide](./docs/proposals/TUNNEL.md) - [Development Guide](./docs/DEVELOP.md) - [Examples](./examples) diff --git a/docs/DEVELOP.md b/docs/DEVELOP.md index ed47616..e5ab7c2 100644 --- a/docs/DEVELOP.md +++ b/docs/DEVELOP.md @@ -310,7 +310,7 @@ Include motivation and context. WebSocket-based JSON protocol for agent-daemon communication. -For complete protocol specification, see [PROTOCOL.md](PROTOCOL.md). +For complete protocol specification, see [proposals/PROTOCOL.md](proposals/PROTOCOL.md). ## Resources @@ -322,6 +322,6 @@ For complete protocol specification, see [PROTOCOL.md](PROTOCOL.md). ## Questions? - Check [ARCHITECTURE.md](ARCHITECTURE.md) for design details -- Check [PROTOCOL.md](PROTOCOL.md) for protocol specification +- Check [proposals/PROTOCOL.md](proposals/PROTOCOL.md) for protocol specification - Check [STATUS.md](STATUS.md) for implementation status - Check [QUICKSTART.md](QUICKSTART.md) for usage examples diff --git a/docs/PROTOCOL.md b/docs/proposals/PROTOCOL.md similarity index 100% rename from docs/PROTOCOL.md rename to docs/proposals/PROTOCOL.md diff --git a/docs/proposals/SNAPSHOTS.md b/docs/proposals/SNAPSHOTS.md new file mode 100644 index 0000000..0602540 --- /dev/null +++ b/docs/proposals/SNAPSHOTS.md @@ -0,0 +1,371 @@ +# SandD Filesystem Snapshot System - Design Proposal + +--- + +## Executive Summary + +This document describes a **Git-inspired content-addressable snapshot system** for SandD daemon workspaces. The system provides: + +- **Filesystem versioning**: Create point-in-time snapshots of daemon workspaces +- **Space-efficient storage**: Automatic deduplication via content addressing +- **Cross-platform compatibility**: Works on Linux, macOS, Windows without special privileges +- **Future extensibility**: Designed to support fast kernel-based backends when root access is available + +**Key Design Decision:** Start with a pure userspace implementation (like Git) that works everywhere, with hooks for privileged optimizations later. + +--- + +## Background & Motivation + +### Problem Statement + +SandD agents need to: +1. **Checkpoint workspace state** before/after tasks +2. **Resume from previous states** if tasks fail +3. **Rollback to known-good states** +4. **Share common base workspaces** across multiple daemons efficiently + +### Constraints + +- **Unknown privilege level**: May run in unprivileged containers or as root on bare metal +- **Cross-platform**: Must support Linux, macOS, Windows +- **Unknown deployment**: Container, VM, bare metal, cloud, on-premise +- **Variable workload**: From small config files to large codebases + +### Related Documentation + +This design builds on existing SandD architecture: + +- **[Protocol Specification](PROTOCOL.md)** - WebSocket message format for daemon communication +- **[Tunnel Mode](TUNNEL.md)** - Secure networking with WireGuard/Tailscale +- **[Architecture Details](../ARCHITECTURE.md)** - Overall system design + +**Snapshot integration points:** +- Protocol: New message types for snapshot operations +- Tunnel: Snapshots are local; future work may support remote snapshot transfer + +--- + +## Goals & Non-Goals + +### Goals + +**Primary Goals:** +- ✅ Snapshot daemon workspaces in seconds (not milliseconds) +- ✅ Restore to any previous snapshot +- ✅ Deduplicate identical files across snapshots +- ✅ Work on any platform without special privileges +- ✅ Preserve file metadata (permissions, timestamps, symlinks) + +**Secondary Goals:** +- ✅ Tag and describe snapshots +- ✅ List and query snapshots +- ✅ Garbage collect unreferenced objects +- ✅ Compress large files (optional) +- ✅ Support incremental snapshots (parent tracking) + +### Non-Goals + +**Explicitly Out of Scope:** +- ❌ Block-level deduplication (file-level only in v1) +- ❌ Branching and merging (Git-like branches not needed) +- ❌ Network synchronization (local only) +- ❌ Encryption (store plaintext objects) +- ❌ Real-time filesystem overlay during execution +- ❌ Process state preservation (filesystem only, not memory/CPU) + +**Future Work:** +- ⏳ Chunked deduplication for large files (v2) +- ⏳ Kernel-based backend when root available (v2) +- ⏳ Remote snapshot storage (v3) + +--- + +## Architecture Overview + +### High-Level Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ SandD Daemon │ +│ ┌────────────────────────────────────────────────────┐ │ +│ │ Snapshot Manager (Public API) │ │ +│ └────────────────┬───────────────────────────────────┘ │ +│ │ │ +│ ┌────────────────┴───────────────────────────────────┐ │ +│ │ Object Store (CAS) │ │ +│ │ - Store blobs by content hash │ │ +│ │ - Retrieve blobs by hash │ │ +│ │ - Automatic deduplication │ │ +│ └────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────┘ + ↓ + ┌────────────────────────────────┐ + │ Filesystem Storage │ + │ .snapshots/ │ + │ ├── objects/ │ + │ │ ├── ab/ │ + │ │ │ └── cdef123... │ + │ │ └── 12/ │ + │ │ └── 3456... │ + │ ├── snapshots/ │ + │ │ ├── snap-uuid-1.json │ + │ │ └── snap-uuid-2.json │ + │ └── HEAD │ + └────────────────────────────────┘ +``` + +**Note:** `ab/` and `12/` are subdirectories named after the first 2 characters of content hashes. This is explained in detail below. + +### Storage Model (Git-Inspired) + +**Content-Addressable Storage:** +- Files stored by BLAKE3 hash (64 hex characters, e.g., `abc123def456...`) +- Automatic deduplication (same content = same hash = stored once) +- Immutable objects (never modified after creation) + +**Hash-Based Directory Sharding:** + +To keep directories fast (many filesystems slow down with >10k files per directory), we split objects into subdirectories based on the **first 2 characters** of their hash: + +``` +Hash: abc123def456789... (64 chars) + ↑↑ ↑↑↑↑↑↑↑↑↑↑↑↑↑ + │ └─ Filename + └─ Subdirectory name + +Stored as: objects/ab/c123def456789... + ↑↑ ↑↑↑↑↑↑↑↑↑↑↑↑↑ + │ └─ Rest of hash (62 chars) + └─ First 2 chars (256 possible: 00-ff) +``` + +**Why this works:** +- BLAKE3 hashes are uniformly distributed (cryptographic property) +- First 2 hex chars = 256 possible subdirectories (16² = 00, 01, ..., fe, ff) +- 10,000 objects = ~39 objects per subdirectory (10000/256) +- Industry standard pattern (used by Git, Docker, IPFS) + +**Example:** + +``` +File: main.rs +Content: "fn main() {}" +Hash: ab7c3ef21a9b4d5e6f8a1c2d3e4f5a6b... +Stored at: objects/ab/7c3ef21a9b4d5e6f8a1c2d3e4f5a6b... + ↑↑ ↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ + │ Remaining 62 characters + First 2 characters +``` + +**Tree Structure:** +``` +workspace/ +├── src/ +│ ├── main.rs → Hash: ab7c3ef2... +│ └── lib.rs → Hash: cd8e9f1a... +└── Cargo.toml → Hash: 12a4b6c8... + +Becomes: + +objects/ +├── ab/ +│ └── 7c3ef2... ← Blob: main.rs content +├── cd/ +│ └── 8e9f1a... ← Blob: lib.rs content +├── 12/ +│ └── a4b6c8... ← Blob: Cargo.toml content +├── ef/ +│ └── aabbcc... ← Tree: src/ directory structure (JSON) +└── 99/ + └── 887766... ← Tree: root directory structure (JSON) + +snapshots/snap-uuid.json → points to root tree (998877...) +``` + +--- + +## Core API + +```rust +pub struct SnapshotManager { + store: ObjectStore, + snapshots_dir: PathBuf, +} + +impl SnapshotManager { + /// Initialize snapshot manager + pub fn new(root: PathBuf) -> Result; + + /// Create a snapshot of workspace + pub async fn create_snapshot( + &self, + workspace: &Path, + message: String, + tags: Vec, + ) -> Result; // Returns snapshot ID + + /// Restore snapshot to destination + pub async fn restore_snapshot( + &self, + snapshot_id: &str, + destination: &Path, + ) -> Result<()>; + + /// List all snapshots + pub async fn list_snapshots(&self) -> Result>; + + /// Delete snapshot and orphaned objects + pub async fn delete_snapshot(&self, id: &str) -> Result<()>; + + /// Garbage collect unreferenced objects + pub async fn gc(&self) -> Result; +} +``` + +--- + +## Protocol Integration + +**Note:** See [Protocol Specification](PROTOCOL.md) for complete message format details. + +**New message types:** + +```rust +pub enum Request { + CreateSnapshot { + daemon_id: String, + workspace_path: String, + message: String, + tags: Vec, + }, + + RestoreSnapshot { + daemon_id: String, + snapshot_id: String, + destination: String, + }, + + ListSnapshots { daemon_id: String }, + DeleteSnapshot { daemon_id: String, snapshot_id: String }, + GarbageCollect { daemon_id: String }, +} + +pub enum Response { + SnapshotCreated { + snapshot_id: String, + file_count: usize, + total_size: u64, + duration_ms: u64, + }, + + SnapshotRestored { file_count: usize, duration_ms: u64 }, + Snapshots { snapshots: Vec }, + SnapshotDeleted { freed_bytes: u64 }, + GarbageCollected { objects_deleted: usize, bytes_freed: u64 }, +} +``` + +--- + +## Performance Characteristics + +### Snapshot Creation + +| Workspace Size | Files | Size | Snapshot Time | +|----------------|-------|------|---------------| +| Small | 100 | 10MB | ~100ms | +| Medium | 1,000 | 100MB | ~800ms | +| Large | 10,000 | 1GB | ~5s | + +### Storage Efficiency + +**Deduplication example:** +- 1GB workspace +- 10 snapshots with 10% change rate per snapshot +- **Storage:** ~2GB (vs 10GB for full copies) + +--- + +### Backend Selection Strategy + +```rust +let backend = if can_use_docker() { + DockerBackend::new() // ~50ms snapshots +} else if can_use_overlayfs() { + OverlayfsBackend::new() // ~20ms snapshots +} else { + GitStyleBackend::new() // ~500ms snapshots, works everywhere +}; +``` + +--- + +## Example Usage + +```rust +use sandd::snapshot::SnapshotManager; + +#[tokio::main] +async fn main() -> Result<()> { + let manager = SnapshotManager::new( + PathBuf::from("/var/sandd/snapshots") + )?; + + // Create snapshot + let snapshot_id = manager.create_snapshot( + Path::new("/workspace/agent-123"), + "Before task execution".to_string(), + vec!["pre-task".to_string()], + ).await?; + + println!("Created snapshot: {}", snapshot_id); + + // List snapshots + let snapshots = manager.list_snapshots().await?; + for snap in snapshots { + println!("{}: {} (tags: {:?})", snap.id, snap.message, snap.tags); + } + + // Restore if needed + manager.restore_snapshot( + &snapshot_id, + Path::new("/tmp/restored"), + ).await?; + + Ok(()) +} +``` + +--- + +## Dependencies + +```toml +[dependencies] +blake3 = "1.5" # Fast hashing +walkdir = "2.4" # Directory traversal +uuid = { version = "1.11", features = ["v4", "serde"] } +zstd = { version = "0.13", optional = true } # Compression + +# Already in workspace +tokio = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +anyhow = { workspace = true } +tracing = { workspace = true } +``` + +--- + +## Alternatives Considered + +| Alternative | Why Not? | +|-------------|----------| +| Docker volumes | Requires Docker, container-only | +| BTRFS/ZFS | Requires specific filesystem + root | +| overlayfs | Requires root, Linux only | +| fuse-overlayfs | 3-4x I/O overhead, requires /dev/fuse | +| rsync | No built-in versioning, manual management | + +**Decision:** Git model is proven, cross-platform, and works everywhere. diff --git a/docs/TUNNEL.md b/docs/proposals/TUNNEL.md similarity index 100% rename from docs/TUNNEL.md rename to docs/proposals/TUNNEL.md diff --git a/examples/tunnel-simple/README.md b/examples/tunnel-simple/README.md index 7dbb947..3f965ff 100644 --- a/examples/tunnel-simple/README.md +++ b/examples/tunnel-simple/README.md @@ -1,6 +1,6 @@ # Tunnel Mode Example -⚠️ **For Development/Testing Only** - See [production guide](../../docs/TUNNEL.md) for real deployments. +⚠️ **For Development/Testing Only** - See [production guide](../../docs/proposals/TUNNEL.md) for real deployments. This example demonstrates tunnel mode setup with Headscale. It shows how to run the controller; you'll launch daemons separately. @@ -280,6 +280,6 @@ docker logs tunnel-simple-headscale-1 ## Next Steps -- [Full Tunnel Guide](../../docs/TUNNEL.md) +- [Full Tunnel Guide](../../docs/proposals/TUNNEL.md) - [Kubernetes Deployment](../../docs/deployment/kubernetes.md) (coming soon) - [Production Best Practices](../../docs/deployment/production.md) (coming soon) diff --git a/hack/docker/README.md b/hack/docker/README.md index 1d48fd3..df28796 100644 --- a/hack/docker/README.md +++ b/hack/docker/README.md @@ -11,7 +11,7 @@ This directory contains Docker-related files for building and testing SandD. - **`Dockerfile.tunnel`** - Server with Tailscale (build from source) - Use: Development and testing - Build: `docker build -f hack/docker/Dockerfile.tunnel -t inftyai/sandd-server:latest-tunnel .` - - See: [docs/TUNNEL.md](../../docs/TUNNEL.md) + - See: [docs/proposals/TUNNEL.md](../../docs/proposals/TUNNEL.md) - **`Dockerfile.tunnel-release`** - Server with Tailscale (uses PyPI release) - Use: Production deployments From 841629edf3ca85935593ef3b458f63c218b75f29 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Sat, 27 Jun 2026 11:10:39 +0800 Subject: [PATCH 2/7] remove sandd_ commands Signed-off-by: kerthcet --- sandd/src/main.rs | 78 +++++++++++++---------------------------------- 1 file changed, 21 insertions(+), 57 deletions(-) diff --git a/sandd/src/main.rs b/sandd/src/main.rs index 91e0e31..9805586 100644 --- a/sandd/src/main.rs +++ b/sandd/src/main.rs @@ -264,59 +264,29 @@ where env, cwd, } => { - // Check for in-tree commands (sandd_* prefix) - if let Some(intree_cmd) = command.strip_prefix("sandd_") { - debug!("Handling in-tree command: {}", intree_cmd); + // Execute command directly via shell + debug!("Executing command: {}", command); + let result = executor.execute(&command, timeout_secs, env, cwd).await; - let start = std::time::Instant::now(); - let result = handle_intree_command(intree_cmd).await; - let duration_ms = start.elapsed().as_millis() as u64; + let response = match result { + Ok(output) => Message::CommandOutput { + request_id, + stdout: output.stdout, + stderr: output.stderr, + exit_code: output.exit_code, + duration_ms: output.duration_ms, + }, + Err(e) => Message::CommandError { + request_id, + error: e.to_string(), + }, + }; - let response = match result { - Ok(output) => Message::CommandOutput { - request_id, - stdout: output, - stderr: String::new(), - exit_code: 0, - duration_ms, - }, - Err(e) => Message::CommandOutput { - request_id, - stdout: String::new(), - stderr: format!("In-tree command error: {}", e), - exit_code: 1, - duration_ms, - }, - }; - - let json = serde_json::to_string(&response)?; - let mut tx = ws_tx.lock().await; - tx.send(WsMessage::Text(json)).await? - } else { - // Normal shell execution - debug!("Executing command: {}", command); - let result = executor.execute(&command, timeout_secs, env, cwd).await; - - let response = match result { - Ok(output) => Message::CommandOutput { - request_id, - stdout: output.stdout, - stderr: output.stderr, - exit_code: output.exit_code, - duration_ms: output.duration_ms, - }, - Err(e) => Message::CommandError { - request_id, - error: e.to_string(), - }, - }; - - let json = serde_json::to_string(&response)?; - let mut tx = ws_tx.lock().await; - tx.send(WsMessage::Text(json)) - .await - .map_err(|e| anyhow::anyhow!("{}", e))?; - } + let json = serde_json::to_string(&response)?; + let mut tx = ws_tx.lock().await; + tx.send(WsMessage::Text(json)) + .await + .map_err(|e| anyhow::anyhow!("{}", e))?; } Message::NewSession { @@ -447,12 +417,6 @@ where Ok(()) } -async fn handle_intree_command(cmd: &str) -> Result { - match cmd { - _ => Err(anyhow::anyhow!("Unknown in-tree command: {}", cmd)), - } -} - async fn setup_tunnel(args: &Args) -> Result<()> { use std::process::Command; From 507fe14c90d425529b398efbe8dc43f8ad99d83b Mon Sep 17 00:00:00 2001 From: kerthcet Date: Mon, 29 Jun 2026 16:03:38 +0100 Subject: [PATCH 3/7] feat: add snapshot/restore Signed-off-by: kerthcet --- Cargo.lock | 323 +++++++++++++- Makefile | 12 + docs/proposals/SNAPSHOTS.md | 137 +----- examples/snapshot_real_project.rs | 219 +++++++++ examples/snapshot_simple.rs | 97 ++++ sandd/Cargo.toml | 26 ++ sandd/benches/snapshot_bench.rs | 376 ++++++++++++++++ sandd/src/lib.rs | 4 + sandd/src/main.rs | 1 + sandd/src/snapshot/manager.rs | 694 +++++++++++++++++++++++++++++ sandd/src/snapshot/mod.rs | 8 + sandd/src/snapshot/object_store.rs | 216 +++++++++ sandd/src/snapshot/tree.rs | 81 ++++ sandd/src/snapshot/types.rs | 40 ++ 14 files changed, 2112 insertions(+), 122 deletions(-) create mode 100644 examples/snapshot_real_project.rs create mode 100644 examples/snapshot_simple.rs create mode 100644 sandd/benches/snapshot_bench.rs create mode 100644 sandd/src/lib.rs create mode 100644 sandd/src/snapshot/manager.rs create mode 100644 sandd/src/snapshot/mod.rs create mode 100644 sandd/src/snapshot/object_store.rs create mode 100644 sandd/src/snapshot/tree.rs create mode 100644 sandd/src/snapshot/types.rs diff --git a/Cargo.lock b/Cargo.lock index d0178f2..eedd2f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "1.0.0" @@ -67,6 +82,18 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f02882884d3e1bc524fb12c79f107f6ad0e1cfd498c536ffb494301740995dfe" + [[package]] name = "async-trait" version = "0.1.89" @@ -166,6 +193,20 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "blake3" +version = "1.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if 1.0.4", + "constant_time_eq", + "cpufeatures 0.3.0", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -203,6 +244,12 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.63" @@ -225,6 +272,47 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chrono" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.6.1" @@ -280,6 +368,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "core-foundation" version = "0.10.1" @@ -305,6 +399,53 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "futures 0.3.32", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "tokio", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -360,6 +501,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -445,6 +592,16 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "filetime" +version = "0.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c287a33c7f0a620c38e641e7f60827713987b3c0f26e8ddc9462cc69cf75759" +dependencies = [ + "cfg-if 1.0.4", + "libc", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -631,6 +788,17 @@ dependencies = [ "wasip3", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if 1.0.4", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -750,6 +918,30 @@ dependencies = [ "tower-service", ] +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "id-arena" version = "2.3.0" @@ -795,12 +987,32 @@ dependencies = [ "libc", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.18" @@ -1062,6 +1274,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.17.0" @@ -1084,6 +1305,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "openssl" version = "0.10.80" @@ -1200,6 +1427,34 @@ version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -1399,6 +1654,18 @@ dependencies = [ "bitflags 2.11.1", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + [[package]] name = "regex-automata" version = "0.4.14" @@ -1450,6 +1717,15 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "sandbox-server" version = "0.0.0" @@ -1479,12 +1755,17 @@ version = "0.0.0" dependencies = [ "anyhow", "base64", + "blake3", + "chrono", "clap", + "criterion", + "filetime", "futures-util", "portable-pty", "serde", "serde_json", "sysinfo", + "tempfile", "tokio", "tokio-process", "tokio-tungstenite", @@ -1492,6 +1773,7 @@ dependencies = [ "tracing", "tracing-subscriber", "uuid", + "walkdir", ] [[package]] @@ -1668,7 +1950,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if 1.0.4", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -1838,6 +2120,16 @@ dependencies = [ "cfg-if 1.0.4", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tokio" version = "1.52.3" @@ -2192,6 +2484,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2295,6 +2597,16 @@ dependencies = [ "semver 1.0.28", ] +[[package]] +name = "web-sys" +version = "0.3.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.2.8" @@ -2323,6 +2635,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Makefile b/Makefile index 32bbea6..71a583d 100644 --- a/Makefile +++ b/Makefile @@ -130,3 +130,15 @@ publish-pypi: $(MATURIN) build-wheels publish-crate: @echo "Publishing sandd daemon to crates.io..." cargo publish --package sandd + +.PHONY: benchmark +benchmark: $(MATURIN) + @echo "Running benchmarks for sandd daemon..." + @echo "" + cargo bench --package sandd + +.PHONY: benchmark-results +benchmark-results: + @echo "Benchmark results for sandd daemon:" + @echo "" + open target/criterion/report/index.html diff --git a/docs/proposals/SNAPSHOTS.md b/docs/proposals/SNAPSHOTS.md index 0602540..d58ccb0 100644 --- a/docs/proposals/SNAPSHOTS.md +++ b/docs/proposals/SNAPSHOTS.md @@ -1,83 +1,29 @@ -# SandD Filesystem Snapshot System - Design Proposal +# SandD Snapshot System ---- - -## Executive Summary - -This document describes a **Git-inspired content-addressable snapshot system** for SandD daemon workspaces. The system provides: - -- **Filesystem versioning**: Create point-in-time snapshots of daemon workspaces -- **Space-efficient storage**: Automatic deduplication via content addressing -- **Cross-platform compatibility**: Works on Linux, macOS, Windows without special privileges -- **Future extensibility**: Designed to support fast kernel-based backends when root access is available - -**Key Design Decision:** Start with a pure userspace implementation (like Git) that works everywhere, with hooks for privileged optimizations later. - ---- - -## Background & Motivation +## Overview -### Problem Statement +A Git-inspired snapshot system for capturing and restoring workspace state in agent sandboxes. This is a **pure snapshot system** (not version control) - focused on state capture/restore rather than tracking changes over time. -SandD agents need to: -1. **Checkpoint workspace state** before/after tasks -2. **Resume from previous states** if tasks fail -3. **Rollback to known-good states** -4. **Share common base workspaces** across multiple daemons efficiently +## Key Features -### Constraints - -- **Unknown privilege level**: May run in unprivileged containers or as root on bare metal -- **Cross-platform**: Must support Linux, macOS, Windows -- **Unknown deployment**: Container, VM, bare metal, cloud, on-premise -- **Variable workload**: From small config files to large codebases - -### Related Documentation - -This design builds on existing SandD architecture: - -- **[Protocol Specification](PROTOCOL.md)** - WebSocket message format for daemon communication -- **[Tunnel Mode](TUNNEL.md)** - Secure networking with WireGuard/Tailscale -- **[Architecture Details](../ARCHITECTURE.md)** - Overall system design - -**Snapshot integration points:** -- Protocol: New message types for snapshot operations -- Tunnel: Snapshots are local; future work may support remote snapshot transfer +- **Hierarchical trees**: Efficient for large projects (100k+ files) +- **Content-addressable storage**: Automatic deduplication via BLAKE3 hashing +- **Cross-platform**: Works on Linux, macOS, Windows without special privileges +- **Tag-based filtering**: Organize snapshots with multiple tags +- **Independent snapshots**: No parent chains, each snapshot stands alone --- -## Goals & Non-Goals - -### Goals - -**Primary Goals:** -- ✅ Snapshot daemon workspaces in seconds (not milliseconds) -- ✅ Restore to any previous snapshot -- ✅ Deduplicate identical files across snapshots -- ✅ Work on any platform without special privileges -- ✅ Preserve file metadata (permissions, timestamps, symlinks) - -**Secondary Goals:** -- ✅ Tag and describe snapshots -- ✅ List and query snapshots -- ✅ Garbage collect unreferenced objects -- ✅ Compress large files (optional) -- ✅ Support incremental snapshots (parent tracking) +## Similar Systems -### Non-Goals +This design takes inspiration from: -**Explicitly Out of Scope:** -- ❌ Block-level deduplication (file-level only in v1) -- ❌ Branching and merging (Git-like branches not needed) -- ❌ Network synchronization (local only) -- ❌ Encryption (store plaintext objects) -- ❌ Real-time filesystem overlay during execution -- ❌ Process state preservation (filesystem only, not memory/CPU) +- **VM Snapshots** (VMware/VirtualBox): State capture/restore +- **ZFS/Btrfs Snapshots**: Filesystem-level snapshots +- **Docker Layers**: Image layers with content addressing +- **Time Machine**: Point-in-time backups -**Future Work:** -- ⏳ Chunked deduplication for large files (v2) -- ⏳ Kernel-based backend when root available (v2) -- ⏳ Remote snapshot storage (v3) +We use Git's storage model (hierarchical trees, content-addressable) but with snapshot semantics (no version control features). --- @@ -268,38 +214,6 @@ pub enum Response { --- -## Performance Characteristics - -### Snapshot Creation - -| Workspace Size | Files | Size | Snapshot Time | -|----------------|-------|------|---------------| -| Small | 100 | 10MB | ~100ms | -| Medium | 1,000 | 100MB | ~800ms | -| Large | 10,000 | 1GB | ~5s | - -### Storage Efficiency - -**Deduplication example:** -- 1GB workspace -- 10 snapshots with 10% change rate per snapshot -- **Storage:** ~2GB (vs 10GB for full copies) - ---- - -### Backend Selection Strategy - -```rust -let backend = if can_use_docker() { - DockerBackend::new() // ~50ms snapshots -} else if can_use_overlayfs() { - OverlayfsBackend::new() // ~20ms snapshots -} else { - GitStyleBackend::new() // ~500ms snapshots, works everywhere -}; -``` - ---- ## Example Usage @@ -339,25 +253,6 @@ async fn main() -> Result<()> { --- -## Dependencies - -```toml -[dependencies] -blake3 = "1.5" # Fast hashing -walkdir = "2.4" # Directory traversal -uuid = { version = "1.11", features = ["v4", "serde"] } -zstd = { version = "0.13", optional = true } # Compression - -# Already in workspace -tokio = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -anyhow = { workspace = true } -tracing = { workspace = true } -``` - ---- - ## Alternatives Considered | Alternative | Why Not? | diff --git a/examples/snapshot_real_project.rs b/examples/snapshot_real_project.rs new file mode 100644 index 0000000..938ab3c --- /dev/null +++ b/examples/snapshot_real_project.rs @@ -0,0 +1,219 @@ +use anyhow::Result; +use sandd::snapshot::SnapshotManager; +use std::time::Instant; +use tempfile::TempDir; + +#[tokio::main] +async fn main() -> Result<()> { + println!("Real Project Snapshot Test\n"); + + let temp_dir = TempDir::new()?; + let snapshot_store = temp_dir.path().join("snapshots"); + + // Use command line argument or default to kubernetes + let repo_url = std::env::args() + .nth(1) + .unwrap_or_else(|| "https://github.com/kubernetes/kubernetes".to_string()); + + let workspace = temp_dir.path().join("project"); + + println!("Cloning repository: {}", repo_url); + let clone_start = Instant::now(); + + let output = std::process::Command::new("git") + .args(&[ + "clone", + "--depth", + "1", + &repo_url, + workspace.to_str().unwrap(), + ]) + .output()?; + + if !output.status.success() { + eprintln!( + "Git clone failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + return Err(anyhow::anyhow!("Failed to clone repository")); + } + + let clone_elapsed = clone_start.elapsed(); + println!("✅ Clone complete: {:.2}s\n", clone_elapsed.as_secs_f64()); + + // Count files + let file_count = walkdir::WalkDir::new(&workspace) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + .count(); + + println!("Project statistics:"); + println!(" Files: {}", file_count); + + // Create first snapshot + let manager = SnapshotManager::new(snapshot_store.clone())?; + + println!("\nCreating first snapshot..."); + let snap1_start = Instant::now(); + let snapshot1_id = manager + .create_snapshot( + &workspace, + Some(format!("Initial snapshot of {}", repo_url)), + Some(vec!["initial".to_string()]), + ) + .await?; + let snap1_elapsed = snap1_start.elapsed(); + + println!("✅ Snapshot 1 created: {}", snapshot1_id); + println!(" Time: {:.2}s", snap1_elapsed.as_secs_f64()); + println!( + " Throughput: {:.0} files/sec\n", + file_count as f64 / snap1_elapsed.as_secs_f64() + ); + + // Modify workspace - add/change a few files + println!("Modifying workspace..."); + tokio::fs::write(workspace.join("NEW_FILE.txt"), "This is a new file\n").await?; + tokio::fs::write(workspace.join("MODIFIED.txt"), "Modified content\n").await?; + + // Find and modify an existing file (if README exists) + if workspace.join("README.md").exists() { + let readme = tokio::fs::read_to_string(workspace.join("README.md")).await?; + tokio::fs::write(workspace.join("README.md"), format!("{}\n\n# Modified", readme)).await?; + println!(" Modified README.md"); + } + println!(" Added NEW_FILE.txt and MODIFIED.txt\n"); + + // Create second snapshot + println!("Creating second snapshot..."); + let snap2_start = Instant::now(); + let snapshot2_id = manager + .create_snapshot( + &workspace, + Some(format!("Modified snapshot of {}", repo_url)), + Some(vec!["modified".to_string()]), + ) + .await?; + let snap2_elapsed = snap2_start.elapsed(); + + println!("✅ Snapshot 2 created: {}", snapshot2_id); + println!(" Time: {:.2}s", snap2_elapsed.as_secs_f64()); + + let snapshot2 = manager.get_snapshot(&snapshot2_id).await?; + println!( + " Throughput: {:.0} files/sec", + snapshot2.file_count as f64 / snap2_elapsed.as_secs_f64() + ); + println!( + " Speedup vs first snapshot: {:.2}x\n", + snap1_elapsed.as_secs_f64() / snap2_elapsed.as_secs_f64() + ); + + // Get snapshot details + let snapshot1 = manager.get_snapshot(&snapshot1_id).await?; + println!("Snapshot 1 details:"); + println!(" Files: {}", snapshot1.file_count); + println!( + " Total size: {} bytes ({:.2} MB)", + snapshot1.total_size, + snapshot1.total_size as f64 / 1_048_576.0 + ); + + println!("\nSnapshot 2 details:"); + println!(" Files: {}", snapshot2.file_count); + println!( + " Total size: {} bytes ({:.2} MB)", + snapshot2.total_size, + snapshot2.total_size as f64 / 1_048_576.0 + ); + + // Check storage efficiency (deduplication) + let objects_dir = snapshot_store.join("objects"); + let mut object_count = 0; + let mut object_size = 0u64; + + for entry in walkdir::WalkDir::new(&objects_dir) + .into_iter() + .filter_map(|e| e.ok()) + { + if entry.file_type().is_file() { + object_count += 1; + if let Ok(metadata) = entry.metadata() { + object_size += metadata.len(); + } + } + } + + println!("\nStorage statistics:"); + println!(" Objects stored: {}", object_count); + println!( + " Storage size: {} bytes ({:.2} MB)", + object_size, + object_size as f64 / 1_048_576.0 + ); + println!( + " Deduplication ratio: {:.2}x (both snapshots share storage)", + (snapshot1.total_size + snapshot2.total_size) as f64 / object_size as f64 + ); + + // Restore first snapshot + println!("\nRestoring snapshot 1..."); + let restore1_dir = temp_dir.path().join("restored1"); + let restore1_start = Instant::now(); + manager.restore_snapshot(&snapshot1_id, &restore1_dir).await?; + let restore1_elapsed = restore1_start.elapsed(); + + println!("✅ Restore 1 complete: {:.2}s", restore1_elapsed.as_secs_f64()); + println!( + " Throughput: {:.0} files/sec", + snapshot1.file_count as f64 / restore1_elapsed.as_secs_f64() + ); + + // Verify restored1 doesn't have new files + assert!(!restore1_dir.join("NEW_FILE.txt").exists()); + println!(" ✓ Verified: no NEW_FILE.txt in snapshot 1"); + + // Restore second snapshot + println!("\nRestoring snapshot 2..."); + let restore2_dir = temp_dir.path().join("restored2"); + let restore2_start = Instant::now(); + manager.restore_snapshot(&snapshot2_id, &restore2_dir).await?; + let restore2_elapsed = restore2_start.elapsed(); + + println!("✅ Restore 2 complete: {:.2}s", restore2_elapsed.as_secs_f64()); + println!( + " Throughput: {:.0} files/sec", + snapshot2.file_count as f64 / restore2_elapsed.as_secs_f64() + ); + + // Verify restored2 has new files + assert!(restore2_dir.join("NEW_FILE.txt").exists()); + println!(" ✓ Verified: NEW_FILE.txt exists in snapshot 2\n"); + + // Performance summary + println!("Performance Summary:"); + println!( + " Snapshot 1: {:.2}s ({:.0} files/sec)", + snap1_elapsed.as_secs_f64(), + snapshot1.file_count as f64 / snap1_elapsed.as_secs_f64() + ); + println!( + " Snapshot 2: {:.2}s ({:.0} files/sec) - {:.2}x faster due to deduplication", + snap2_elapsed.as_secs_f64(), + snapshot2.file_count as f64 / snap2_elapsed.as_secs_f64(), + snap1_elapsed.as_secs_f64() / snap2_elapsed.as_secs_f64() + ); + println!( + " Restore 1: {:.2}s ({:.0} files/sec)", + restore1_elapsed.as_secs_f64(), + snapshot1.file_count as f64 / restore1_elapsed.as_secs_f64() + ); + println!( + " Restore 2: {:.2}s ({:.0} files/sec)", + restore2_elapsed.as_secs_f64(), + snapshot2.file_count as f64 / restore2_elapsed.as_secs_f64() + ); + + Ok(()) +} diff --git a/examples/snapshot_simple.rs b/examples/snapshot_simple.rs new file mode 100644 index 0000000..bc0ba9b --- /dev/null +++ b/examples/snapshot_simple.rs @@ -0,0 +1,97 @@ +use anyhow::Result; +use sandd::snapshot::SnapshotManager; +use tempfile::TempDir; +use tokio::fs; + +#[tokio::main] +async fn main() -> Result<()> { + println!("SandD Snapshot System Demo\n"); + + // Setup: create temporary workspace and snapshot storage + let temp_dir = TempDir::new()?; + let workspace = temp_dir.path().join("workspace"); + let snapshot_store = temp_dir.path().join("snapshots"); + + fs::create_dir_all(&workspace).await?; + + let manager = SnapshotManager::new(snapshot_store)?; + + // 1. Create initial workspace state + println!("1. Creating initial workspace..."); + fs::write(workspace.join("README.md"), "# My Project\n").await?; + fs::create_dir_all(workspace.join("src")).await?; + fs::write(workspace.join("src/main.rs"), "fn main() {}\n").await?; + fs::write(workspace.join("src/lib.rs"), "pub fn hello() {}\n").await?; + + // 2. Create snapshot with tags + println!("2. Creating snapshot 'init'..."); + let snap1_id = manager + .create_snapshot( + &workspace, + Some("Initial project setup".to_string()), + Some(vec!["init".to_string(), "stable".to_string()]), + ) + .await?; + println!(" Created: {}", snap1_id); + + // 3. Modify workspace + println!("\n3. Modifying workspace..."); + fs::write(workspace.join("src/main.rs"), "fn main() {\n println!(\"Hello!\");\n}\n").await?; + fs::write(workspace.join("Cargo.toml"), "[package]\nname = \"demo\"\n").await?; + + // 4. Create another snapshot + println!("4. Creating snapshot 'feature-work'..."); + let snap2_id = manager + .create_snapshot( + &workspace, + Some("Added hello world".to_string()), + Some(vec!["feature".to_string()]), + ) + .await?; + println!(" Created: {}", snap2_id); + + // 5. List all snapshots + println!("\n5. Listing all snapshots:"); + let all_snapshots = manager.list_snapshots(None).await?; + for snap in &all_snapshots { + println!(" {} - {} (tags: {:?})", snap.id, snap.message, snap.tags); + println!(" Files: {}, Size: {} bytes", snap.file_count, snap.total_size); + } + + // 6. Filter by tag + println!("\n6. Finding snapshots with 'init' tag:"); + let init_snapshots = manager.find_by_tag("init").await?; + for snap in &init_snapshots { + println!(" {} - {}", snap.id, snap.message); + } + + println!("\n7. Finding snapshots with 'feature' tag:"); + let feature_snapshots = manager.find_by_tag("feature").await?; + for snap in &feature_snapshots { + println!(" {} - {}", snap.id, snap.message); + } + + // 8. Restore first snapshot to new location + println!("\n8. Restoring '{}' to new location...", snap1_id); + let restore_dir = temp_dir.path().join("restored"); + manager.restore_snapshot(&snap1_id, &restore_dir).await?; + + // Verify restored files + let readme = fs::read_to_string(restore_dir.join("README.md")).await?; + let main_rs = fs::read_to_string(restore_dir.join("src/main.rs")).await?; + println!(" Restored README.md: {}", readme.trim()); + println!(" Restored src/main.rs: {}", main_rs.trim()); + + // 9. Get snapshot details + println!("\n9. Getting snapshot details:"); + let snap = manager.get_snapshot(&snap2_id).await?; + println!(" ID: {}", snap.id); + println!(" Message: {}", snap.message); + println!(" Tags: {:?}", snap.tags); + println!(" Created: {:?}", snap.created_at); + println!(" Files: {}", snap.file_count); + println!(" Total size: {} bytes", snap.total_size); + + println!("\n✅ Demo complete!"); + Ok(()) +} diff --git a/sandd/Cargo.toml b/sandd/Cargo.toml index f134613..66df9ca 100644 --- a/sandd/Cargo.toml +++ b/sandd/Cargo.toml @@ -15,6 +15,18 @@ categories = ["command-line-utilities"] name = "sandd" path = "src/main.rs" +[lib] +name = "sandd" +path = "src/lib.rs" + +[[example]] +name = "snapshot_simple" +path = "../examples/snapshot_simple.rs" + +[[example]] +name = "snapshot_real_project" +path = "../examples/snapshot_real_project.rs" + [dependencies] tokio = { workspace = true } serde = { workspace = true } @@ -45,3 +57,17 @@ tokio-util = { version = "0.7", features = ["io"] } # Base64 for protocol base64 = "0.22" + +# Snapshot system +blake3 = "1.5" +walkdir = "2.4" +chrono = { version = "0.4", features = ["serde"] } +filetime = "0.2" + +[dev-dependencies] +tempfile = "3.8" +criterion = { version = "0.5", features = ["async_tokio"] } + +[[bench]] +name = "snapshot_bench" +harness = false diff --git a/sandd/benches/snapshot_bench.rs b/sandd/benches/snapshot_bench.rs new file mode 100644 index 0000000..914b1c5 --- /dev/null +++ b/sandd/benches/snapshot_bench.rs @@ -0,0 +1,376 @@ +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use sandd::snapshot::SnapshotManager; +use tempfile::TempDir; +use tokio::runtime::Runtime; + +// Helper to create a workspace with N files +async fn create_test_workspace(dir: &std::path::Path, num_files: usize, file_size: usize) { + tokio::fs::create_dir_all(dir).await.unwrap(); + + for i in 0..num_files { + let file = dir.join(format!("file_{:05}.txt", i)); + let content = vec![b'A' + (i % 26) as u8; file_size]; + tokio::fs::write(&file, content).await.unwrap(); + } +} + +/// Benchmark: Small files (typical source code) +/// +/// **Purpose:** Baseline performance for common use case +/// **Tests:** Many small files (~100 bytes, unique content) +/// **Expected:** Shows syscall overhead dominates (not I/O) +/// **Detects:** Per-file overhead bottleneck - typical for codebases +fn bench_small_files(c: &mut Criterion) { + let mut group = c.benchmark_group("small_files"); + group.sample_size(10); + group.measurement_time(std::time::Duration::from_secs(10)); + + for num_files in [100, 1000, 5000].iter() { + group.throughput(Throughput::Elements(*num_files as u64)); + + // Benchmark: Snapshot creation + group.bench_with_input( + BenchmarkId::new("snapshot", num_files), + num_files, + |b, &num_files| { + let rt = Runtime::new().unwrap(); + + b.to_async(&rt).iter(|| async { + let temp_dir = TempDir::new().unwrap(); + let workspace = temp_dir.path().join("workspace"); + let store_dir = temp_dir.path().join("store"); + + create_test_workspace(&workspace, num_files, 100).await; + + let manager = SnapshotManager::new(store_dir).unwrap(); + manager + .create_snapshot(&workspace, None, None) + .await + .unwrap(); + + black_box(()) + }); + }, + ); + + // Benchmark: Restore + group.bench_with_input( + BenchmarkId::new("restore", num_files), + num_files, + |b, &num_files| { + let rt = Runtime::new().unwrap(); + + // Setup: create snapshot once + let temp_dir = TempDir::new().unwrap(); + let workspace = temp_dir.path().join("workspace"); + let store_dir = temp_dir.path().join("store"); + + rt.block_on(async { + create_test_workspace(&workspace, num_files, 100).await; + }); + + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = rt.block_on(async { + manager + .create_snapshot(&workspace, None, None) + .await + .unwrap() + }); + + // Benchmark: restore only + b.to_async(&rt).iter(|| async { + let restore_dir = temp_dir + .path() + .join(format!("restore_{}", uuid::Uuid::new_v4())); + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + black_box(()) + }); + }, + ); + } + + group.finish(); +} + +/// Benchmark: Large files +/// +/// **Purpose:** I/O throughput measurement +/// **Tests:** Binary/media files (1MB, 10MB, 100MB) +/// **Expected:** Should show MB/sec throughput (I/O bound) +/// **Detects:** Buffer size issues, streaming efficiency +fn bench_large_files(c: &mut Criterion) { + let mut group = c.benchmark_group("large_files"); + group.sample_size(10); + group.measurement_time(std::time::Duration::from_secs(10)); + + // Files of different sizes: 1MB, 10MB, 100MB + for file_size in [1024 * 1024, 10 * 1024 * 1024, 100 * 1024 * 1024].iter() { + let size_mb = file_size / (1024 * 1024); + group.throughput(Throughput::Bytes(*file_size as u64)); + + // Benchmark: Snapshot creation + group.bench_with_input( + BenchmarkId::new("snapshot", format!("{}MB", size_mb)), + file_size, + |b, &file_size| { + let rt = Runtime::new().unwrap(); + + b.to_async(&rt).iter(|| async { + let temp_dir = TempDir::new().unwrap(); + let workspace = temp_dir.path().join("workspace"); + let store_dir = temp_dir.path().join("store"); + + create_test_workspace(&workspace, 1, file_size).await; + + let manager = SnapshotManager::new(store_dir).unwrap(); + manager + .create_snapshot(&workspace, None, None) + .await + .unwrap(); + + black_box(()) + }); + }, + ); + + // Benchmark: Restore + group.bench_with_input( + BenchmarkId::new("restore", format!("{}MB", size_mb)), + file_size, + |b, &file_size| { + let rt = Runtime::new().unwrap(); + + // Setup: create snapshot once + let temp_dir = TempDir::new().unwrap(); + let workspace = temp_dir.path().join("workspace"); + let store_dir = temp_dir.path().join("store"); + + rt.block_on(async { + create_test_workspace(&workspace, 1, file_size).await; + }); + + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = rt.block_on(async { + manager + .create_snapshot(&workspace, None, None) + .await + .unwrap() + }); + + // Benchmark: restore only + b.to_async(&rt).iter(|| async { + let restore_dir = temp_dir + .path() + .join(format!("restore_{}", uuid::Uuid::new_v4())); + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + black_box(()) + }); + }, + ); + } + + group.finish(); +} + +/// Benchmark: Scalability with number of files +/// +/// **Purpose:** Test if performance scales linearly +/// **Tests:** 100 → 50K files +/// **Expected:** Constant files/sec (linear scaling) +/// **Detects:** Non-linear scaling issues +fn bench_file_count_scaling(c: &mut Criterion) { + let mut group = c.benchmark_group("file_count_scaling"); + group.sample_size(10); + group.measurement_time(std::time::Duration::from_secs(300)); + + // Test scalability: 100, 500, 1K, 5K, 10K, 50K, 100K files + for num_files in [100, 500, 1000, 5000, 10000, 50000, 100000].iter() { + group.throughput(Throughput::Elements(*num_files as u64)); + + // Benchmark: Snapshot creation + group.bench_with_input( + BenchmarkId::new("snapshot", format!("{}files", num_files)), + num_files, + |b, &num_files| { + let rt = Runtime::new().unwrap(); + + b.to_async(&rt).iter(|| async { + let temp_dir = TempDir::new().unwrap(); + let workspace = temp_dir.path().join("workspace"); + let store_dir = temp_dir.path().join("store"); + + create_test_workspace(&workspace, num_files, 100).await; + + let manager = SnapshotManager::new(store_dir).unwrap(); + manager + .create_snapshot(&workspace, None, None) + .await + .unwrap(); + + black_box(()) + }); + }, + ); + + // Benchmark: Restore + group.bench_with_input( + BenchmarkId::new("restore", format!("{}files", num_files)), + num_files, + |b, &num_files| { + let rt = Runtime::new().unwrap(); + + // Setup: create snapshot once + let temp_dir = TempDir::new().unwrap(); + let workspace = temp_dir.path().join("workspace"); + let store_dir = temp_dir.path().join("store"); + + rt.block_on(async { + create_test_workspace(&workspace, num_files, 100).await; + }); + + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = rt.block_on(async { + manager + .create_snapshot(&workspace, None, None) + .await + .unwrap() + }); + + // Benchmark: restore only + b.to_async(&rt).iter(|| async { + let restore_dir = temp_dir + .path() + .join(format!("restore_{}", uuid::Uuid::new_v4())); + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + black_box(()) + }); + }, + ); + } + + group.finish(); +} + +/// Benchmark: Nested directory structure scaling +/// +/// **Purpose:** Test flat vs nested directory structures +/// **Tests:** Different directory depths with same total files +/// **Expected:** Similar performance (tree structure handles both) +/// **Detects:** Directory traversal bottlenecks +fn bench_directory_depth(c: &mut Criterion) { + let mut group = c.benchmark_group("directory_depth"); + group.sample_size(10); + group.measurement_time(std::time::Duration::from_secs(10)); + + // Test with different directory structures + for (depth, files_per_dir) in [(1, 1000), (5, 200), (10, 100)].iter() { + let total_files = depth * files_per_dir; + group.throughput(Throughput::Elements(total_files as u64)); + + // Benchmark: Snapshot creation + group.bench_with_input( + BenchmarkId::new("snapshot", format!("depth{}_total{}", depth, total_files)), + &(depth, files_per_dir), + |b, &(depth, files_per_dir)| { + let rt = Runtime::new().unwrap(); + + b.to_async(&rt).iter(|| async { + let temp_dir = TempDir::new().unwrap(); + let workspace = temp_dir.path().join("workspace"); + let store_dir = temp_dir.path().join("store"); + + // Create nested directory structure + tokio::fs::create_dir_all(&workspace).await.unwrap(); + for d in 0..*depth { + let dir = workspace.join(format!("dir_{}", d)); + tokio::fs::create_dir_all(&dir).await.unwrap(); + for f in 0..*files_per_dir { + let file = dir.join(format!("file_{}.txt", f)); + let content = vec![b'A'; 100]; + tokio::fs::write(&file, content).await.unwrap(); + } + } + + let manager = SnapshotManager::new(store_dir).unwrap(); + manager + .create_snapshot(&workspace, None, None) + .await + .unwrap(); + + black_box(()) + }); + }, + ); + + // Benchmark: Restore + group.bench_with_input( + BenchmarkId::new("restore", format!("depth{}_total{}", depth, total_files)), + &(depth, files_per_dir), + |b, &(depth, files_per_dir)| { + let rt = Runtime::new().unwrap(); + + // Setup: create snapshot once + let temp_dir = TempDir::new().unwrap(); + let workspace = temp_dir.path().join("workspace"); + let store_dir = temp_dir.path().join("store"); + + rt.block_on(async { + tokio::fs::create_dir_all(&workspace).await.unwrap(); + for d in 0..*depth { + let dir = workspace.join(format!("dir_{}", d)); + tokio::fs::create_dir_all(&dir).await.unwrap(); + for f in 0..*files_per_dir { + let file = dir.join(format!("file_{}.txt", f)); + let content = vec![b'A'; 100]; + tokio::fs::write(&file, content).await.unwrap(); + } + } + }); + + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = rt.block_on(async { + manager + .create_snapshot(&workspace, None, None) + .await + .unwrap() + }); + + // Benchmark: restore only + b.to_async(&rt).iter(|| async { + let restore_dir = temp_dir + .path() + .join(format!("restore_{}", uuid::Uuid::new_v4())); + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + black_box(()) + }); + }, + ); + } + + group.finish(); +} + +criterion_group!( + benches, + bench_small_files, + bench_large_files, + bench_file_count_scaling, + bench_directory_depth +); +criterion_main!(benches); diff --git a/sandd/src/lib.rs b/sandd/src/lib.rs new file mode 100644 index 0000000..5e75756 --- /dev/null +++ b/sandd/src/lib.rs @@ -0,0 +1,4 @@ +// SandD Library +// Re-export public modules for use in examples and external crates + +pub mod snapshot; diff --git a/sandd/src/main.rs b/sandd/src/main.rs index 9805586..370aecb 100644 --- a/sandd/src/main.rs +++ b/sandd/src/main.rs @@ -1,6 +1,7 @@ mod executor; mod protocol; mod session; +pub mod snapshot; use anyhow::{Context, Result}; use clap::Parser; diff --git a/sandd/src/snapshot/manager.rs b/sandd/src/snapshot/manager.rs new file mode 100644 index 0000000..c2f1426 --- /dev/null +++ b/sandd/src/snapshot/manager.rs @@ -0,0 +1,694 @@ +use crate::snapshot::object_store::ObjectStore; +use crate::snapshot::tree::{get_mode, set_mode, set_mtime, EntryType, Tree, TreeEntry}; +use crate::snapshot::types::{Snapshot, SnapshotId, SnapshotInfo}; +use anyhow::{Context, Result}; +use std::path::{Path, PathBuf}; +use std::time::SystemTime; +use tokio::fs; +use uuid::Uuid; + +pub struct SnapshotManager { + store: ObjectStore, + snapshots_dir: PathBuf, +} + +impl SnapshotManager { + pub fn new(root: PathBuf) -> Result { + let store = ObjectStore::new(root.clone()); + let snapshots_dir = root.join("snapshots"); + + std::fs::create_dir_all(&snapshots_dir).with_context(|| { + format!( + "Failed to create snapshots directory: {}", + snapshots_dir.display() + ) + })?; + + Ok(Self { + store, + snapshots_dir, + }) + } + + /// Create a snapshot of workspace + pub async fn create_snapshot( + &self, + workspace: &Path, + message: Option, + tags: Option>, + ) -> Result { + let snapshot_id = Uuid::new_v4().to_string(); + + // Build tree recursively + let (tree_hash, file_count, total_size) = self.build_tree(workspace).await?; + + // Create snapshot metadata + let snapshot = Snapshot { + id: snapshot_id.clone(), + created_at: SystemTime::now(), + tree: tree_hash, + message: message.unwrap_or_else(|| format!("Snapshot {}", snapshot_id)), + tags: tags.unwrap_or_default(), + workspace_path: workspace.to_path_buf(), + file_count, + total_size, + }; + + // Save snapshot + let snapshot_file = self.snapshots_dir.join(format!("{}.json", snapshot_id)); + let json = serde_json::to_string_pretty(&snapshot)?; + + // Atomic write + let temp_file = snapshot_file.with_extension("tmp"); + fs::write(&temp_file, json).await?; + fs::rename(temp_file, snapshot_file).await?; + + Ok(snapshot_id) + } + + /// Build tree recursively, return (tree_hash, file_count, total_size) + fn build_tree<'a>( + &'a self, + dir: &'a Path, + ) -> std::pin::Pin> + 'a>> + { + Box::pin(async move { + let mut entries = Vec::new(); + let mut file_count = 0usize; + let mut total_size = 0u64; + + let mut read_dir = fs::read_dir(dir).await?; + while let Some(entry) = read_dir.next_entry().await? { + let path = entry.path(); + let metadata = entry.metadata().await?; + let name = entry.file_name().to_string_lossy().to_string(); + + let (entry_type, hash, size, _sub_count) = if metadata.is_file() { + // Store file as blob object + let hash = self.store.put_file(&path).await?; + let size = metadata.len(); + total_size += size; + file_count += 1; + (EntryType::Blob, hash, size, 0) + } else if metadata.is_dir() { + // Recursively build tree object for subdirectory + let (hash, sub_count, sub_size) = self.build_tree(&path).await?; + total_size += sub_size; + file_count += sub_count; + (EntryType::Tree, hash, 0, sub_count) + } else if metadata.is_symlink() { + // Store symlink target as blob object + let target = fs::read_link(&path).await?; + let target_bytes = target.to_string_lossy().as_bytes().to_vec(); + let hash = self.store.put_blob(&target_bytes).await?; + file_count += 1; + (EntryType::Symlink, hash, 0, 0) + } else { + continue; // Skip other types + }; + + entries.push(TreeEntry { + name, + mode: get_mode(&metadata), + entry_type, + hash, + size, + modified: metadata.modified()?, + }); + } + + // Create and store tree object (JSON) + let tree = Tree { entries }; + let tree_json = serde_json::to_vec(&tree)?; + let tree_hash = self.store.put_blob(&tree_json).await?; + + Ok((tree_hash, file_count, total_size)) + }) + } + + /// Restore snapshot to destination + pub async fn restore_snapshot(&self, snapshot_id: &str, dest: &Path) -> Result<()> { + // Load snapshot + let snapshot_file = self.snapshots_dir.join(format!("{}.json", snapshot_id)); + let json = fs::read_to_string(snapshot_file) + .await + .with_context(|| format!("Snapshot {} not found", snapshot_id))?; + let snapshot: Snapshot = serde_json::from_str(&json)?; + + // Restore tree recursively + self.restore_tree(&snapshot.tree, dest).await?; + + Ok(()) + } + + /// Restore tree recursively + fn restore_tree<'a>( + &'a self, + tree_hash: &'a str, + dest: &'a Path, + ) -> std::pin::Pin> + 'a>> { + Box::pin(async move { + fs::create_dir_all(dest).await?; + + // Load tree object + let tree_json = self.store.get_blob(tree_hash).await?; + let tree: Tree = serde_json::from_slice(&tree_json)?; + + // Restore each entry + for entry in tree.entries { + let entry_path = dest.join(&entry.name); + + match entry.entry_type { + EntryType::Blob => { + // Restore file from blob object + self.store.copy_file(&entry.hash, &entry_path).await?; + set_mode(&entry_path, entry.mode)?; + set_mtime(&entry_path, entry.modified)?; + } + EntryType::Tree => { + // Recursively restore subdirectory from tree object + self.restore_tree(&entry.hash, &entry_path).await?; + } + EntryType::Symlink => { + // Restore symlink from blob object (target path) + let target_bytes = self.store.get_blob(&entry.hash).await?; + let target = PathBuf::from(String::from_utf8(target_bytes)?); + + #[cfg(unix)] + tokio::fs::symlink(target, entry_path).await?; + + #[cfg(windows)] + { + if target.is_dir() { + tokio::fs::symlink_dir(target, entry_path).await?; + } else { + tokio::fs::symlink_file(target, entry_path).await?; + } + } + } + } + } + + Ok(()) + }) + } + + /// List all snapshots (optionally filtered by tags) + pub async fn list_snapshots( + &self, + filter_tags: Option>, + ) -> Result> { + let mut snapshots = Vec::new(); + + let mut entries = fs::read_dir(&self.snapshots_dir).await?; + while let Some(entry) = entries.next_entry().await? { + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) != Some("json") { + continue; + } + + let json = fs::read_to_string(&path).await?; + let snapshot: Snapshot = serde_json::from_str(&json)?; + + // Filter by tags if specified + if let Some(ref filter) = filter_tags { + if !filter.iter().any(|tag| snapshot.tags.contains(tag)) { + continue; + } + } + + snapshots.push(snapshot.into()); + } + + // Sort by creation time (newest first) + snapshots.sort_by(|a: &SnapshotInfo, b: &SnapshotInfo| b.created_at.cmp(&a.created_at)); + + Ok(snapshots) + } + + /// Find snapshots by tag + pub async fn find_by_tag(&self, tag: &str) -> Result> { + self.list_snapshots(Some(vec![tag.to_string()])).await + } + + /// Get snapshot by ID + pub async fn get_snapshot(&self, id: &str) -> Result { + let snapshot_file = self.snapshots_dir.join(format!("{}.json", id)); + let json = fs::read_to_string(snapshot_file) + .await + .with_context(|| format!("Snapshot {} not found", id))?; + let snapshot: Snapshot = serde_json::from_str(&json)?; + Ok(snapshot) + } + + /// Delete snapshot + /// TODO: This will remove the snapshot metadata file, but the underlying objects in the object + /// store will remain. + pub async fn delete_snapshot(&self, id: &str) -> Result<()> { + let snapshot_file = self.snapshots_dir.join(format!("{}.json", id)); + fs::remove_file(snapshot_file) + .await + .with_context(|| format!("Failed to delete snapshot {}", id))?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn test_create_and_restore_snapshot() { + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + let restore_dir = temp_dir.path().join("restored"); + + // Create test workspace + fs::create_dir_all(&workspace).await.unwrap(); + fs::write(workspace.join("file1.txt"), "Hello") + .await + .unwrap(); + fs::create_dir_all(workspace.join("subdir")).await.unwrap(); + fs::write(workspace.join("subdir/file2.txt"), "World") + .await + .unwrap(); + + // Create snapshot + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = manager + .create_snapshot(&workspace, Some("Test snapshot".to_string()), None) + .await + .unwrap(); + + // Restore snapshot + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + // Verify restored files + let content1 = fs::read_to_string(restore_dir.join("file1.txt")) + .await + .unwrap(); + assert_eq!(content1, "Hello"); + + let content2 = fs::read_to_string(restore_dir.join("subdir/file2.txt")) + .await + .unwrap(); + assert_eq!(content2, "World"); + } + + #[tokio::test] + async fn test_timestamp_preservation() { + use std::time::Duration; + + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + let restore_dir = temp_dir.path().join("restored"); + + // Create test file + fs::create_dir_all(&workspace).await.unwrap(); + let test_file = workspace.join("test.txt"); + fs::write(&test_file, "content").await.unwrap(); + + // Get original timestamp + let original_metadata = fs::metadata(&test_file).await.unwrap(); + let original_mtime = original_metadata.modified().unwrap(); + + // Wait a bit to ensure timestamps would differ + tokio::time::sleep(Duration::from_millis(100)).await; + + // Create snapshot + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = manager + .create_snapshot(&workspace, Some("Test".to_string()), None) + .await + .unwrap(); + + // Wait again + tokio::time::sleep(Duration::from_millis(100)).await; + + // Restore snapshot + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + // Check restored file has original timestamp + let restored_metadata = fs::metadata(restore_dir.join("test.txt")).await.unwrap(); + let restored_mtime = restored_metadata.modified().unwrap(); + + // Timestamps should match (within 1 second for filesystem precision) + let diff = if restored_mtime > original_mtime { + restored_mtime.duration_since(original_mtime).unwrap() + } else { + original_mtime.duration_since(restored_mtime).unwrap() + }; + + assert!( + diff < Duration::from_secs(1), + "Timestamp should be preserved. Original: {:?}, Restored: {:?}", + original_mtime, + restored_mtime + ); + } + + #[tokio::test] + async fn test_list_snapshots() { + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + + fs::create_dir_all(&workspace).await.unwrap(); + fs::write(workspace.join("test.txt"), "content") + .await + .unwrap(); + + let manager = SnapshotManager::new(store_dir).unwrap(); + + // Create multiple snapshots + let _id1 = manager + .create_snapshot(&workspace, Some("First".to_string()), Some(vec!["tag1".to_string()])) + .await + .unwrap(); + + let _id2 = manager + .create_snapshot(&workspace, Some("Second".to_string()), Some(vec!["tag2".to_string()])) + .await + .unwrap(); + + // List all snapshots + let snapshots = manager.list_snapshots(None).await.unwrap(); + assert_eq!(snapshots.len(), 2); + assert_eq!(snapshots[0].message, "Second"); // Newest first + assert_eq!(snapshots[1].message, "First"); + + // Filter by tag + let tag1_snapshots = manager + .list_snapshots(Some(vec!["tag1".to_string()])) + .await + .unwrap(); + assert_eq!(tag1_snapshots.len(), 1); + assert_eq!(tag1_snapshots[0].message, "First"); + + // Find by tag + let tag2_snapshots = manager.find_by_tag("tag2").await.unwrap(); + assert_eq!(tag2_snapshots.len(), 1); + assert_eq!(tag2_snapshots[0].message, "Second"); + } + + #[tokio::test] + async fn test_binary_files() { + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + let restore_dir = temp_dir.path().join("restored"); + + fs::create_dir_all(&workspace).await.unwrap(); + let binary_data = vec![0x00, 0xFF, 0xAB, 0xCD, 0x12, 0x34]; + fs::write(workspace.join("binary.dat"), &binary_data) + .await + .unwrap(); + + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = manager + .create_snapshot(&workspace, Some("Binary test".to_string()), None) + .await + .unwrap(); + + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + let restored = fs::read(restore_dir.join("binary.dat")).await.unwrap(); + assert_eq!(restored, binary_data); + } + + #[tokio::test] + async fn test_empty_directories() { + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + let restore_dir = temp_dir.path().join("restored"); + + fs::create_dir_all(&workspace).await.unwrap(); + fs::create_dir_all(workspace.join("empty")).await.unwrap(); + fs::create_dir_all(workspace.join("nested/empty/dirs")) + .await + .unwrap(); + + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = manager + .create_snapshot(&workspace, Some("Empty dirs test".to_string()), None) + .await + .unwrap(); + + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + assert!(restore_dir.join("empty").is_dir()); + assert!(restore_dir.join("nested/empty/dirs").is_dir()); + } + + #[tokio::test] + async fn test_deduplication() { + use std::time::Duration; + + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + + fs::create_dir_all(&workspace).await.unwrap(); + + let manager = SnapshotManager::new(store_dir.clone()).unwrap(); + + // Create first file with content + let content = "Same content in multiple files"; + fs::write(workspace.join("file1.txt"), content).await.unwrap(); + + // Create first snapshot + manager + .create_snapshot(&workspace, Some("First".to_string()), None) + .await + .unwrap(); + + // Get blob creation time + let objects_dir = store_dir.join("objects"); + let mut first_blob_path = None; + for entry in walkdir::WalkDir::new(&objects_dir).into_iter().filter_map(|e| e.ok()) { + if entry.file_type().is_file() { + let data = std::fs::read(entry.path()).unwrap(); + if data == content.as_bytes() { + first_blob_path = Some(entry.path().to_path_buf()); + break; + } + } + } + + let first_blob_path = first_blob_path.expect("Should find content blob"); + let first_created = std::fs::metadata(&first_blob_path) + .unwrap() + .modified() + .unwrap(); + + // Wait to ensure timestamp would differ if recreated + tokio::time::sleep(Duration::from_millis(100)).await; + + // Add more files with same content + fs::write(workspace.join("file2.txt"), content).await.unwrap(); + fs::write(workspace.join("file3.txt"), content).await.unwrap(); + + // Create second snapshot + manager + .create_snapshot(&workspace, Some("Second".to_string()), None) + .await + .unwrap(); + + // Verify blob wasn't recreated (timestamp unchanged) + let second_created = std::fs::metadata(&first_blob_path) + .unwrap() + .modified() + .unwrap(); + + assert_eq!( + first_created, second_created, + "Blob should not be recreated - timestamp should be unchanged" + ); + + // Count unique content blobs + let mut content_blob_count = 0; + for entry in walkdir::WalkDir::new(&objects_dir).into_iter().filter_map(|e| e.ok()) { + if entry.file_type().is_file() { + let data = std::fs::read(entry.path()).unwrap(); + if data == content.as_bytes() { + content_blob_count += 1; + } + } + } + + assert_eq!(content_blob_count, 1, "Content should be stored exactly once"); + } + + #[tokio::test] + async fn test_special_filenames() { + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + let restore_dir = temp_dir.path().join("restored"); + + fs::create_dir_all(&workspace).await.unwrap(); + fs::write(workspace.join("file with spaces.txt"), "Spaces") + .await + .unwrap(); + fs::write(workspace.join("file-with-dashes.txt"), "Dashes") + .await + .unwrap(); + + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = manager + .create_snapshot(&workspace, Some("Special names".to_string()), None) + .await + .unwrap(); + + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + assert_eq!( + fs::read_to_string(restore_dir.join("file with spaces.txt")) + .await + .unwrap(), + "Spaces" + ); + assert_eq!( + fs::read_to_string(restore_dir.join("file-with-dashes.txt")) + .await + .unwrap(), + "Dashes" + ); + } + + #[tokio::test] + #[cfg(unix)] + async fn test_symlinks() { + use std::os::unix::fs::symlink; + + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + let restore_dir = temp_dir.path().join("restored"); + + fs::create_dir_all(&workspace).await.unwrap(); + fs::write(workspace.join("target.txt"), "Target") + .await + .unwrap(); + symlink(workspace.join("target.txt"), workspace.join("link.txt")).unwrap(); + + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = manager + .create_snapshot(&workspace, Some("Symlink test".to_string()), None) + .await + .unwrap(); + + manager + .restore_snapshot(&snapshot_id, &restore_dir) + .await + .unwrap(); + + let link_target = fs::read_link(restore_dir.join("link.txt")).await.unwrap(); + assert!(link_target.to_string_lossy().contains("target.txt")); + assert_eq!( + fs::read_to_string(restore_dir.join("target.txt")) + .await + .unwrap(), + "Target" + ); + } + + #[tokio::test] + async fn test_get_snapshot() { + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + + fs::create_dir_all(&workspace).await.unwrap(); + fs::write(workspace.join("file.txt"), "Content") + .await + .unwrap(); + + let manager = SnapshotManager::new(store_dir).unwrap(); + let snapshot_id = manager + .create_snapshot( + &workspace, + Some("Test message".to_string()), + Some(vec!["tag1".to_string(), "tag2".to_string()]), + ) + .await + .unwrap(); + + // Get snapshot by ID + let snapshot = manager.get_snapshot(&snapshot_id).await.unwrap(); + + assert_eq!(snapshot.id, snapshot_id); + assert_eq!(snapshot.message, "Test message"); + assert_eq!(snapshot.tags, vec!["tag1", "tag2"]); + assert_eq!(snapshot.file_count, 1); + assert_eq!(snapshot.workspace_path, workspace); + + // Try getting non-existent snapshot + let result = manager.get_snapshot("non-existent-id").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_delete_snapshot() { + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + + fs::create_dir_all(&workspace).await.unwrap(); + fs::write(workspace.join("file.txt"), "Content") + .await + .unwrap(); + + let manager = SnapshotManager::new(store_dir).unwrap(); + + // Create two snapshots + let snap1_id = manager + .create_snapshot(&workspace, Some("Snapshot 1".to_string()), None) + .await + .unwrap(); + + let snap2_id = manager + .create_snapshot(&workspace, Some("Snapshot 2".to_string()), None) + .await + .unwrap(); + + // List should have 2 snapshots + let snapshots = manager.list_snapshots(None).await.unwrap(); + assert_eq!(snapshots.len(), 2); + + // Delete first snapshot + manager.delete_snapshot(&snap1_id).await.unwrap(); + + // List should now have 1 snapshot + let snapshots = manager.list_snapshots(None).await.unwrap(); + assert_eq!(snapshots.len(), 1); + assert_eq!(snapshots[0].id, snap2_id); + + // Getting deleted snapshot should fail + let result = manager.get_snapshot(&snap1_id).await; + assert!(result.is_err()); + + // Deleting non-existent snapshot should fail + let result = manager.delete_snapshot("non-existent-id").await; + assert!(result.is_err()); + } +} diff --git a/sandd/src/snapshot/mod.rs b/sandd/src/snapshot/mod.rs new file mode 100644 index 0000000..67ad05d --- /dev/null +++ b/sandd/src/snapshot/mod.rs @@ -0,0 +1,8 @@ +pub mod object_store; +pub mod tree; +pub mod manager; +pub mod types; + +pub use manager::SnapshotManager; +pub use types::{SnapshotId, SnapshotInfo, Snapshot}; +pub use object_store::ObjectStore; diff --git a/sandd/src/snapshot/object_store.rs b/sandd/src/snapshot/object_store.rs new file mode 100644 index 0000000..8681b48 --- /dev/null +++ b/sandd/src/snapshot/object_store.rs @@ -0,0 +1,216 @@ +use anyhow::{Context, Result}; +use std::path::{Path, PathBuf}; +use tokio::fs; +use tokio::io::AsyncReadExt; + +pub struct ObjectStore { + root: PathBuf, +} + +impl ObjectStore { + pub fn new(root: PathBuf) -> Self { + Self { root } + } + + /// Store a blob (arbitrary bytes), return its hash + pub async fn put_blob(&self, content: &[u8]) -> Result { + let hash = blake3::hash(content); + let hash_hex = hash.to_hex().to_string(); + + // Check if already exists (deduplication!) + let object_path = self.hash_to_path(&hash_hex); + if object_path.exists() { + return Ok(hash_hex); + } + + // Store object: objects/ab/cdef123... + fs::create_dir_all(object_path.parent().unwrap()).await?; + + // Atomic write (temp + rename, like Git) + let temp_path = object_path.with_extension("tmp"); + fs::write(&temp_path, content).await?; + fs::rename(temp_path, object_path).await?; + + Ok(hash_hex) + } + + /// Store a file by path, return its hash + /// Uses streaming to handle large files without loading entire file into memory + pub async fn put_file(&self, path: &Path) -> Result { + // Stream file in chunks to compute hash + let mut file = fs::File::open(path) + .await + .with_context(|| format!("Failed to open file: {}", path.display()))?; + + let mut hasher = blake3::Hasher::new(); + let mut buffer = vec![0u8; 4 * 1024 * 1024]; // 4MB buffer + + loop { + let n = file.read(&mut buffer).await?; + if n == 0 { + break; + } + hasher.update(&buffer[..n]); + } + + let hash = hasher.finalize(); + let hash_hex = hash.to_hex().to_string(); + + // Check if already exists (deduplication!) + let object_path = self.hash_to_path(&hash_hex); + if object_path.exists() { + return Ok(hash_hex); + } + + // Copy file to object store + fs::create_dir_all(object_path.parent().unwrap()).await?; + + // Atomic write: temp file + rename + let temp_path = object_path.with_extension("tmp"); + fs::copy(path, &temp_path) + .await + .with_context(|| format!("Failed to copy file to object store"))?; + fs::rename(temp_path, object_path).await?; + + Ok(hash_hex) + } + + /// Get blob content by hash + pub async fn get_blob(&self, hash: &str) -> Result> { + let object_path = self.hash_to_path(hash); + fs::read(&object_path) + .await + .with_context(|| format!("Object {} not found", hash)) + } + + /// Copy object to file + pub async fn copy_file(&self, hash: &str, dest: &Path) -> Result<()> { + let object_path = self.hash_to_path(hash); + + // Ensure parent directory exists + if let Some(parent) = dest.parent() { + fs::create_dir_all(parent).await?; + } + + fs::copy(&object_path, dest) + .await + .with_context(|| format!("Failed to copy object {} to {}", hash, dest.display()))?; + Ok(()) + } + + /// Check if object exists + pub fn exists(&self, hash: &str) -> bool { + self.hash_to_path(hash).exists() + } + + /// Convert hash to filesystem path + /// Hash: abc123def456... → objects/ab/c123def456... + fn hash_to_path(&self, hash: &str) -> PathBuf { + self.root + .join("objects") + .join(&hash[..2]) // First 2 chars as subdir + .join(&hash[2..]) // Rest as filename + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn test_put_and_get_blob() { + let temp_dir = TempDir::new().unwrap(); + let store = ObjectStore::new(temp_dir.path().to_path_buf()); + + let content = b"Hello, world!"; + let hash = store.put_blob(content).await.unwrap(); + + let retrieved = store.get_blob(&hash).await.unwrap(); + assert_eq!(content, retrieved.as_slice()); + } + + #[tokio::test] + async fn test_deduplication() { + use std::time::Duration; + + let temp_dir = TempDir::new().unwrap(); + let store = ObjectStore::new(temp_dir.path().to_path_buf()); + + let content = b"Same content"; + + // First write + let hash1 = store.put_blob(content).await.unwrap(); + + // Get blob file path and timestamp + let blob_path = store.hash_to_path(&hash1); + let first_modified = std::fs::metadata(&blob_path) + .unwrap() + .modified() + .unwrap(); + + // Wait to ensure timestamp would differ if file was rewritten + tokio::time::sleep(Duration::from_millis(100)).await; + + // Second write (same content) + let hash2 = store.put_blob(content).await.unwrap(); + + // Verify deduplication + assert_eq!(hash1, hash2, "Same content should produce same hash"); + assert!(store.exists(&hash1), "Blob should exist"); + + // Verify file wasn't rewritten (timestamp unchanged) + let second_modified = std::fs::metadata(&blob_path) + .unwrap() + .modified() + .unwrap(); + + assert_eq!( + first_modified, second_modified, + "Blob file should not be rewritten - timestamp should be unchanged" + ); + } + + #[tokio::test] + async fn test_hash_sharding() { + let temp_dir = TempDir::new().unwrap(); + let store = ObjectStore::new(temp_dir.path().to_path_buf()); + + let hash = "abc123def456789"; + let path = store.hash_to_path(hash); + + // Should create subdirectory based on first 2 chars + assert!(path.to_string_lossy().contains("/ab/")); + assert!(path.to_string_lossy().contains("c123def456789")); + } + + #[tokio::test] + async fn test_large_file_streaming() { + use tokio::io::AsyncWriteExt; + + let temp_dir = TempDir::new().unwrap(); + let store = ObjectStore::new(temp_dir.path().to_path_buf()); + + // Create a "large" test file (10MB) + let large_file = temp_dir.path().join("large.bin"); + let mut file = tokio::fs::File::create(&large_file).await.unwrap(); + + // Write 10MB of data in chunks (simulates large file) + let chunk = vec![0xAB; 1024 * 1024]; // 1MB chunk + for _ in 0..10 { + file.write_all(&chunk).await.unwrap(); + } + file.flush().await.unwrap(); + drop(file); + + // Store the large file (should stream, not load all into memory) + let hash = store.put_file(&large_file).await.unwrap(); + + // Verify we can retrieve it + assert!(store.exists(&hash)); + + // Verify hash is consistent + let hash2 = store.put_file(&large_file).await.unwrap(); + assert_eq!(hash, hash2, "Same file should produce same hash"); + } +} diff --git a/sandd/src/snapshot/tree.rs b/sandd/src/snapshot/tree.rs new file mode 100644 index 0000000..47646f6 --- /dev/null +++ b/sandd/src/snapshot/tree.rs @@ -0,0 +1,81 @@ +use serde::{Deserialize, Serialize}; +use std::time::SystemTime; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Tree { + pub entries: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TreeEntry { + pub name: String, // File/directory name (not full path) + pub mode: u32, // Unix permissions (e.g., 0o755) + pub entry_type: EntryType, + pub hash: String, // Content hash (BLAKE3) + pub size: u64, // Size in bytes + #[serde(with = "system_time_format")] + pub modified: SystemTime, // Last modified time +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum EntryType { + Blob, // File content (blob object) + Tree, // Subdirectory (tree object) + Symlink, // Symbolic link (blob object storing target path) +} + +// Helper module for SystemTime serialization +mod system_time_format { + use serde::{self, Deserialize, Deserializer, Serializer}; + use std::time::{SystemTime, UNIX_EPOCH}; + + pub fn serialize(time: &SystemTime, serializer: S) -> Result + where + S: Serializer, + { + let duration = time + .duration_since(UNIX_EPOCH) + .map_err(serde::ser::Error::custom)?; + serializer.serialize_u64(duration.as_secs()) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let secs = u64::deserialize(deserializer)?; + Ok(UNIX_EPOCH + std::time::Duration::from_secs(secs)) + } +} + +#[cfg(unix)] +pub fn get_mode(metadata: &std::fs::Metadata) -> u32 { + use std::os::unix::fs::PermissionsExt; + metadata.permissions().mode() +} + +#[cfg(not(unix))] +pub fn get_mode(_metadata: &std::fs::Metadata) -> u32 { + 0o644 // Default for non-Unix systems +} + +#[cfg(unix)] +pub fn set_mode(path: &std::path::Path, mode: u32) -> anyhow::Result<()> { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(path, std::fs::Permissions::from_mode(mode))?; + Ok(()) +} + +#[cfg(not(unix))] +pub fn set_mode(_path: &std::path::Path, _mode: u32) -> anyhow::Result<()> { + Ok(()) // No-op on non-Unix systems +} + +pub fn set_mtime(path: &std::path::Path, mtime: SystemTime) -> anyhow::Result<()> { + use filetime::{FileTime, set_file_mtime}; + + let filetime = FileTime::from_system_time(mtime); + set_file_mtime(path, filetime)?; + Ok(()) +} diff --git a/sandd/src/snapshot/types.rs b/sandd/src/snapshot/types.rs new file mode 100644 index 0000000..dd8d2db --- /dev/null +++ b/sandd/src/snapshot/types.rs @@ -0,0 +1,40 @@ +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use std::time::SystemTime; + +pub type SnapshotId = String; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Snapshot { + pub id: SnapshotId, + pub created_at: SystemTime, + pub tree: String, + pub message: String, + pub tags: Vec, + pub workspace_path: PathBuf, + pub file_count: usize, + pub total_size: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SnapshotInfo { + pub id: SnapshotId, + pub created_at: SystemTime, + pub message: String, + pub tags: Vec, + pub file_count: usize, + pub total_size: u64, +} + +impl From for SnapshotInfo { + fn from(snapshot: Snapshot) -> Self { + Self { + id: snapshot.id, + created_at: snapshot.created_at, + message: snapshot.message, + tags: snapshot.tags, + file_count: snapshot.file_count, + total_size: snapshot.total_size, + } + } +} From e1ed7552e202b2ec463f29fac83dd8748addd073 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Mon, 29 Jun 2026 16:22:31 +0100 Subject: [PATCH 4/7] skip copy paste if the content is same Signed-off-by: kerthcet --- sandd/src/snapshot/manager.rs | 74 ++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/sandd/src/snapshot/manager.rs b/sandd/src/snapshot/manager.rs index c2f1426..5b74322 100644 --- a/sandd/src/snapshot/manager.rs +++ b/sandd/src/snapshot/manager.rs @@ -160,8 +160,32 @@ impl SnapshotManager { match entry.entry_type { EntryType::Blob => { - // Restore file from blob object - self.store.copy_file(&entry.hash, &entry_path).await?; + // Check if file already exists with same content + let should_copy = if entry_path.exists() { + // Compare metadata first (fast check) + if let Ok(metadata) = fs::metadata(&entry_path).await { + if metadata.len() == entry.size + && metadata.modified().ok() == Some(entry.modified) { + // Size and mtime match - likely unchanged, skip copy + false + } else { + // Metadata differs - need to verify with hash + let file_hash = self.store.put_file(&entry_path).await?; + file_hash != entry.hash + } + } else { + true + } + } else { + true + }; + + if should_copy { + // Restore file from blob object + self.store.copy_file(&entry.hash, &entry_path).await?; + } + + // Always update metadata (cheap operation) set_mode(&entry_path, entry.mode)?; set_mtime(&entry_path, entry.modified)?; } @@ -575,6 +599,52 @@ mod tests { ); } + #[tokio::test] + async fn test_restore_skip_unchanged() { + use std::time::Duration; + + let temp_dir = TempDir::new().unwrap(); + let store_dir = temp_dir.path().join("store"); + let workspace = temp_dir.path().join("workspace"); + let restore_dir = temp_dir.path().join("restored"); + + fs::create_dir_all(&workspace).await.unwrap(); + fs::write(workspace.join("file.txt"), "content").await.unwrap(); + + let manager = SnapshotManager::new(store_dir.clone()).unwrap(); + let snapshot_id = manager + .create_snapshot(&workspace, Some("Test".to_string()), None) + .await + .unwrap(); + + // First restore + manager.restore_snapshot(&snapshot_id, &restore_dir).await.unwrap(); + + // Get file timestamp after first restore + let first_timestamp = std::fs::metadata(restore_dir.join("file.txt")) + .unwrap() + .modified() + .unwrap(); + + // Wait to ensure timestamp would differ if file was rewritten + tokio::time::sleep(Duration::from_millis(100)).await; + + // Second restore to same location + manager.restore_snapshot(&snapshot_id, &restore_dir).await.unwrap(); + + // File should NOT be rewritten (timestamp unchanged) + let second_timestamp = std::fs::metadata(restore_dir.join("file.txt")) + .unwrap() + .modified() + .unwrap(); + + // Timestamps should match (file was not recopied) + assert_eq!( + first_timestamp, second_timestamp, + "File should not be recopied if unchanged" + ); + } + #[tokio::test] #[cfg(unix)] async fn test_symlinks() { From 1ba4bd4f55bc2e48d4450d4c8f15cf3d5e2efaac Mon Sep 17 00:00:00 2001 From: kerthcet Date: Mon, 29 Jun 2026 16:34:50 +0100 Subject: [PATCH 5/7] udpate tests Signed-off-by: kerthcet --- sandd/benches/snapshot_bench.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sandd/benches/snapshot_bench.rs b/sandd/benches/snapshot_bench.rs index 914b1c5..1676164 100644 --- a/sandd/benches/snapshot_bench.rs +++ b/sandd/benches/snapshot_bench.rs @@ -99,7 +99,7 @@ fn bench_small_files(c: &mut Criterion) { /// Benchmark: Large files /// /// **Purpose:** I/O throughput measurement -/// **Tests:** Binary/media files (1MB, 10MB, 100MB) +/// **Tests:** Binary/media files (1MB, 10MB, 20MB, 100MB) /// **Expected:** Should show MB/sec throughput (I/O bound) /// **Detects:** Buffer size issues, streaming efficiency fn bench_large_files(c: &mut Criterion) { @@ -107,8 +107,15 @@ fn bench_large_files(c: &mut Criterion) { group.sample_size(10); group.measurement_time(std::time::Duration::from_secs(10)); - // Files of different sizes: 1MB, 10MB, 100MB - for file_size in [1024 * 1024, 10 * 1024 * 1024, 100 * 1024 * 1024].iter() { + // Files of different sizes: 1MB, 10MB, 20MB, 100MB + for file_size in [ + 1024 * 1024, + 10 * 1024 * 1024, + 20 * 1024 * 1024, + 100 * 1024 * 1024, + ] + .iter() + { let size_mb = file_size / (1024 * 1024); group.throughput(Throughput::Bytes(*file_size as u64)); From 62f407a11d4cd9036fb4bd246782319e53e391c6 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Mon, 29 Jun 2026 17:15:48 +0100 Subject: [PATCH 6/7] fix hash_to_path Signed-off-by: kerthcet --- sandd/src/snapshot/object_store.rs | 54 ++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/sandd/src/snapshot/object_store.rs b/sandd/src/snapshot/object_store.rs index 8681b48..1072e66 100644 --- a/sandd/src/snapshot/object_store.rs +++ b/sandd/src/snapshot/object_store.rs @@ -18,7 +18,7 @@ impl ObjectStore { let hash_hex = hash.to_hex().to_string(); // Check if already exists (deduplication!) - let object_path = self.hash_to_path(&hash_hex); + let object_path = self.hash_to_path(&hash_hex)?; if object_path.exists() { return Ok(hash_hex); } @@ -57,7 +57,7 @@ impl ObjectStore { let hash_hex = hash.to_hex().to_string(); // Check if already exists (deduplication!) - let object_path = self.hash_to_path(&hash_hex); + let object_path = self.hash_to_path(&hash_hex)?; if object_path.exists() { return Ok(hash_hex); } @@ -77,7 +77,7 @@ impl ObjectStore { /// Get blob content by hash pub async fn get_blob(&self, hash: &str) -> Result> { - let object_path = self.hash_to_path(hash); + let object_path = self.hash_to_path(hash)?; fs::read(&object_path) .await .with_context(|| format!("Object {} not found", hash)) @@ -85,7 +85,7 @@ impl ObjectStore { /// Copy object to file pub async fn copy_file(&self, hash: &str, dest: &Path) -> Result<()> { - let object_path = self.hash_to_path(hash); + let object_path = self.hash_to_path(hash)?; // Ensure parent directory exists if let Some(parent) = dest.parent() { @@ -100,16 +100,29 @@ impl ObjectStore { /// Check if object exists pub fn exists(&self, hash: &str) -> bool { - self.hash_to_path(hash).exists() + self.hash_to_path(hash).map(|p| p.exists()).unwrap_or(false) } /// Convert hash to filesystem path /// Hash: abc123def456... → objects/ab/c123def456... - fn hash_to_path(&self, hash: &str) -> PathBuf { - self.root + /// + /// # Safety + /// Validates hash to prevent panic and path traversal + fn hash_to_path(&self, hash: &str) -> Result { + // Need at least 3 chars to slice safely (hash[..2] and hash[2..]) + if hash.len() < 3 { + anyhow::bail!("Invalid hash: too short (need at least 3 chars)"); + } + + // Validate hex characters only (prevent path traversal like "../") + if !hash.chars().all(|c| c.is_ascii_hexdigit()) { + anyhow::bail!("Invalid hash: must contain only hex characters (0-9, a-f)"); + } + + Ok(self.root .join("objects") .join(&hash[..2]) // First 2 chars as subdir - .join(&hash[2..]) // Rest as filename + .join(&hash[2..])) // Rest as filename } } @@ -143,7 +156,7 @@ mod tests { let hash1 = store.put_blob(content).await.unwrap(); // Get blob file path and timestamp - let blob_path = store.hash_to_path(&hash1); + let blob_path = store.hash_to_path(&hash1).unwrap(); let first_modified = std::fs::metadata(&blob_path) .unwrap() .modified() @@ -177,13 +190,34 @@ mod tests { let store = ObjectStore::new(temp_dir.path().to_path_buf()); let hash = "abc123def456789"; - let path = store.hash_to_path(hash); + let path = store.hash_to_path(hash).unwrap(); // Should create subdirectory based on first 2 chars assert!(path.to_string_lossy().contains("/ab/")); assert!(path.to_string_lossy().contains("c123def456789")); } + #[test] + fn test_hash_validation() { + let temp_dir = TempDir::new().unwrap(); + let store = ObjectStore::new(temp_dir.path().to_path_buf()); + + // Valid hash (hex only, 3+ chars) + assert!(store.hash_to_path("abc123").is_ok()); + assert!(store.hash_to_path("def456789abcdef").is_ok()); + + // Too short (< 3 chars) - should fail + assert!(store.hash_to_path("ab").is_err()); + assert!(store.hash_to_path("a").is_err()); + assert!(store.hash_to_path("").is_err()); + + // Path traversal attempts - should fail (non-hex characters) + assert!(store.hash_to_path("../etc/passwd").is_err()); + assert!(store.hash_to_path("..").is_err()); + assert!(store.hash_to_path("ab/../cd").is_err()); + assert!(store.hash_to_path("abc/123").is_err()); + } + #[tokio::test] async fn test_large_file_streaming() { use tokio::io::AsyncWriteExt; From d57a107ab6240958260f47f8af14ee479f09a04d Mon Sep 17 00:00:00 2001 From: kerthcet Date: Mon, 29 Jun 2026 18:45:29 +0100 Subject: [PATCH 7/7] address comments Signed-off-by: kerthcet --- Cargo.lock | 48 ------------------------ docs/proposals/SNAPSHOTS.md | 39 ++++++++++++------- sandd/Cargo.toml | 1 - sandd/src/snapshot/manager.rs | 60 ++++++++++++++++++++++-------- sandd/src/snapshot/object_store.rs | 9 ++++- sandd/src/snapshot/tree.rs | 9 +++-- 6 files changed, 83 insertions(+), 83 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eedd2f9..cf6d2a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,15 +11,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anes" version = "0.1.6" @@ -272,20 +263,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" -[[package]] -name = "chrono" -version = "0.4.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "serde", - "wasm-bindgen", - "windows-link", -] - [[package]] name = "ciborium" version = "0.2.2" @@ -918,30 +895,6 @@ dependencies = [ "tower-service", ] -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "id-arena" version = "2.3.0" @@ -1756,7 +1709,6 @@ dependencies = [ "anyhow", "base64", "blake3", - "chrono", "clap", "criterion", "filetime", diff --git a/docs/proposals/SNAPSHOTS.md b/docs/proposals/SNAPSHOTS.md index d58ccb0..4a803ce 100644 --- a/docs/proposals/SNAPSHOTS.md +++ b/docs/proposals/SNAPSHOTS.md @@ -147,8 +147,8 @@ impl SnapshotManager { pub async fn create_snapshot( &self, workspace: &Path, - message: String, - tags: Vec, + message: Option, + tags: Option>, ) -> Result; // Returns snapshot ID /// Restore snapshot to destination @@ -158,14 +158,20 @@ impl SnapshotManager { destination: &Path, ) -> Result<()>; - /// List all snapshots - pub async fn list_snapshots(&self) -> Result>; + /// List all snapshots (optionally filtered by tags) + pub async fn list_snapshots( + &self, + filter_tags: Option>, + ) -> Result>; - /// Delete snapshot and orphaned objects - pub async fn delete_snapshot(&self, id: &str) -> Result<()>; + /// Find snapshots by tag + pub async fn find_by_tag(&self, tag: &str) -> Result>; - /// Garbage collect unreferenced objects - pub async fn gc(&self) -> Result; + /// Get snapshot by ID + pub async fn get_snapshot(&self, id: &str) -> Result; + + /// Delete snapshot + pub async fn delete_snapshot(&self, id: &str) -> Result<()>; } ``` @@ -226,21 +232,28 @@ async fn main() -> Result<()> { PathBuf::from("/var/sandd/snapshots") )?; - // Create snapshot + // Create snapshot with optional message and tags let snapshot_id = manager.create_snapshot( Path::new("/workspace/agent-123"), - "Before task execution".to_string(), - vec!["pre-task".to_string()], + Some("Before task execution".to_string()), + Some(vec!["pre-task".to_string()]), ).await?; println!("Created snapshot: {}", snapshot_id); - // List snapshots - let snapshots = manager.list_snapshots().await?; + // List all snapshots + let snapshots = manager.list_snapshots(None).await?; for snap in snapshots { println!("{}: {} (tags: {:?})", snap.id, snap.message, snap.tags); } + // Find snapshots by tag + let pre_task_snapshots = manager.find_by_tag("pre-task").await?; + + // Get specific snapshot details + let snapshot = manager.get_snapshot(&snapshot_id).await?; + println!("Files: {}, Size: {} bytes", snapshot.file_count, snapshot.total_size); + // Restore if needed manager.restore_snapshot( &snapshot_id, diff --git a/sandd/Cargo.toml b/sandd/Cargo.toml index 66df9ca..8d2974b 100644 --- a/sandd/Cargo.toml +++ b/sandd/Cargo.toml @@ -61,7 +61,6 @@ base64 = "0.22" # Snapshot system blake3 = "1.5" walkdir = "2.4" -chrono = { version = "0.4", features = ["serde"] } filetime = "0.2" [dev-dependencies] diff --git a/sandd/src/snapshot/manager.rs b/sandd/src/snapshot/manager.rs index 5b74322..2c4d58f 100644 --- a/sandd/src/snapshot/manager.rs +++ b/sandd/src/snapshot/manager.rs @@ -80,10 +80,34 @@ impl SnapshotManager { let mut read_dir = fs::read_dir(dir).await?; while let Some(entry) = read_dir.next_entry().await? { let path = entry.path(); - let metadata = entry.metadata().await?; + // Use symlink_metadata to NOT follow symlinks + let metadata = fs::symlink_metadata(&path).await?; let name = entry.file_name().to_string_lossy().to_string(); - let (entry_type, hash, size, _sub_count) = if metadata.is_file() { + let (entry_type, hash, size, _sub_count) = if metadata.is_symlink() { + // Check symlink FIRST (before is_file/is_dir which would follow the link) + // Store symlink target as blob object + let target = fs::read_link(&path).await?; + let target_bytes = target.to_string_lossy().as_bytes().to_vec(); + let hash = self.store.put_blob(&target_bytes).await?; + file_count += 1; + + // Determine symlink type (for Windows symlink restoration) + let target_is_dir = if target.is_absolute() { + target.is_dir() + } else { + // Relative path - resolve relative to parent + path.parent().map(|p| p.join(&target).is_dir()).unwrap_or(false) + }; + + let symlink_type = if target_is_dir { + EntryType::SymlinkDir + } else { + EntryType::Symlink + }; + + (symlink_type, hash, 0, 0) + } else if metadata.is_file() { // Store file as blob object let hash = self.store.put_file(&path).await?; let size = metadata.len(); @@ -96,15 +120,13 @@ impl SnapshotManager { total_size += sub_size; file_count += sub_count; (EntryType::Tree, hash, 0, sub_count) - } else if metadata.is_symlink() { - // Store symlink target as blob object - let target = fs::read_link(&path).await?; - let target_bytes = target.to_string_lossy().as_bytes().to_vec(); - let hash = self.store.put_blob(&target_bytes).await?; - file_count += 1; - (EntryType::Symlink, hash, 0, 0) } else { - continue; // Skip other types + // Skip unsupported file types (pipes, sockets, devices, etc.) + tracing::warn!( + "Skipping unsupported file type: {} (not a regular file, directory, or symlink)", + path.display() + ); + continue; }; entries.push(TreeEntry { @@ -193,20 +215,28 @@ impl SnapshotManager { // Recursively restore subdirectory from tree object self.restore_tree(&entry.hash, &entry_path).await?; } - EntryType::Symlink => { + EntryType::Symlink | EntryType::SymlinkDir => { // Restore symlink from blob object (target path) let target_bytes = self.store.get_blob(&entry.hash).await?; let target = PathBuf::from(String::from_utf8(target_bytes)?); + // Remove existing file/symlink if present (for idempotent restore) + if entry_path.exists() || entry_path.is_symlink() { + // Use remove_file for both files and symlinks + let _ = fs::remove_file(&entry_path).await; + } + #[cfg(unix)] - tokio::fs::symlink(target, entry_path).await?; + tokio::fs::symlink(target, &entry_path).await?; #[cfg(windows)] { - if target.is_dir() { - tokio::fs::symlink_dir(target, entry_path).await?; + // Use EntryType to determine symlink type (stored at snapshot time) + // (can't check target.is_dir() since target may not exist yet or be relative) + if entry.entry_type == EntryType::SymlinkDir { + tokio::fs::symlink_dir(target, &entry_path).await?; } else { - tokio::fs::symlink_file(target, entry_path).await?; + tokio::fs::symlink_file(target, &entry_path).await?; } } } diff --git a/sandd/src/snapshot/object_store.rs b/sandd/src/snapshot/object_store.rs index 1072e66..13c4fa7 100644 --- a/sandd/src/snapshot/object_store.rs +++ b/sandd/src/snapshot/object_store.rs @@ -193,8 +193,13 @@ mod tests { let path = store.hash_to_path(hash).unwrap(); // Should create subdirectory based on first 2 chars - assert!(path.to_string_lossy().contains("/ab/")); - assert!(path.to_string_lossy().contains("c123def456789")); + // Use path components instead of string matching (cross-platform) + let components: Vec<_> = path.components().collect(); + + // Path should be: /objects/ab/c123def456789 + assert!(components.len() >= 3); + assert_eq!(components[components.len() - 2].as_os_str(), "ab"); + assert_eq!(components[components.len() - 1].as_os_str(), "c123def456789"); } #[test] diff --git a/sandd/src/snapshot/tree.rs b/sandd/src/snapshot/tree.rs index 47646f6..b77e857 100644 --- a/sandd/src/snapshot/tree.rs +++ b/sandd/src/snapshot/tree.rs @@ -18,11 +18,12 @@ pub struct TreeEntry { } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "lowercase")] +#[serde(rename_all = "snake_case")] pub enum EntryType { - Blob, // File content (blob object) - Tree, // Subdirectory (tree object) - Symlink, // Symbolic link (blob object storing target path) + Blob, // File content (blob object) + Tree, // Subdirectory (tree object) + Symlink, // Symlink to file (blob object storing target path) + SymlinkDir, // Symlink to directory (blob object storing target path) } // Helper module for SystemTime serialization