Skip to main content

ssg/
cache.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Content fingerprinting for incremental builds.
5//!
6//! This module provides `BuildCache`, which tracks SHA-256-style
7//! fingerprints of content files so that only files modified since the
8//! last build need to be re-processed.
9//!
10//! # Overview
11//!
12//! 1. On startup, call `BuildCache::load` to read the previous
13//!    fingerprint map from `.ssg-cache.json`.
14//! 2. Call `BuildCache::changed_files` with the content directory to
15//!    obtain the list of files whose contents have changed (or are new).
16//! 3. After a successful build, call `BuildCache::update` to record
17//!    the current fingerprints, then `BuildCache::save` to persist
18//!    them to disk.
19//!
20//! # Example
21//!
22//! ```no_run
23//! use std::path::Path;
24//! use ssg::cache::BuildCache;
25//!
26//! let cache_path = Path::new(".ssg-cache.json");
27//! let content_dir = Path::new("content");
28//!
29//! let mut cache = BuildCache::load(cache_path).unwrap();
30//! let changed = cache.changed_files(content_dir).unwrap();
31//!
32//! // … build only `changed` files …
33//!
34//! cache.update(content_dir).unwrap();
35//! cache.save().unwrap();
36//! ```
37
38use std::collections::HashMap;
39use std::fs;
40use std::path::{Path, PathBuf};
41
42use anyhow::{Context, Result};
43use serde::{Deserialize, Serialize};
44
45/// Default name for the on-disk cache file.
46const DEFAULT_CACHE_FILE: &str = ".ssg-cache.json";
47
48/// Persisted fingerprint map used for incremental builds.
49///
50/// Each entry maps a file path (relative to the content directory) to a
51/// hex-encoded hash of that file's contents. Comparing the stored hash
52/// against the current hash tells us whether the file has changed.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct BuildCache {
55    /// Path to the cache file on disk.
56    #[serde(skip)]
57    cache_path: PathBuf,
58
59    /// Map from relative file paths to their content fingerprints.
60    fingerprints: HashMap<PathBuf, String>,
61}
62
63impl BuildCache {
64    // -----------------------------------------------------------------
65    // Construction / persistence
66    // -----------------------------------------------------------------
67
68    /// Load a previously saved cache from `cache_path`.
69    ///
70    /// If the file does not exist a fresh, empty cache is returned.
71    /// Any other I/O or parse error is propagated.
72    ///
73    /// # Errors
74    ///
75    /// Returns an error if the file exists but cannot be read or
76    /// contains invalid JSON.
77    pub fn load(cache_path: &Path) -> Result<Self> {
78        if !cache_path.exists() {
79            return Ok(Self {
80                cache_path: cache_path.to_path_buf(),
81                fingerprints: HashMap::new(),
82            });
83        }
84
85        fail_point!("cache::read", |_| {
86            anyhow::bail!("injected: cache::read")
87        });
88        let data = fs::read_to_string(cache_path).with_context(|| {
89            format!("failed to read cache file: {}", cache_path.display())
90        })?;
91
92        fail_point!("cache::parse", |_| {
93            anyhow::bail!("injected: cache::parse")
94        });
95        let mut cache: Self =
96            serde_json::from_str(&data).with_context(|| {
97                format!("failed to parse cache file: {}", cache_path.display())
98            })?;
99
100        cache.cache_path = cache_path.to_path_buf();
101        Ok(cache)
102    }
103
104    /// Create a new empty cache that will be written to `cache_path`.
105    #[must_use]
106    pub fn new(cache_path: &Path) -> Self {
107        Self {
108            cache_path: cache_path.to_path_buf(),
109            fingerprints: HashMap::new(),
110        }
111    }
112
113    /// Persist the current fingerprint map to the cache file.
114    ///
115    /// # Errors
116    ///
117    /// Returns an error if the file cannot be written.
118    pub fn save(&self) -> Result<()> {
119        let json = serde_json::to_string_pretty(self)
120            .context("failed to serialize cache")?;
121        fail_point!("cache::write", |_| {
122            anyhow::bail!("injected: cache::write")
123        });
124        fs::write(&self.cache_path, json).with_context(|| {
125            format!("failed to write cache file: {}", self.cache_path.display())
126        })?;
127        Ok(())
128    }
129
130    // -----------------------------------------------------------------
131    // Fingerprinting helpers
132    // -----------------------------------------------------------------
133
134    /// Compute a deterministic hex fingerprint of the given file.
135    ///
136    /// Uses streaming I/O via `stream::stream_hash` — reads in 8 KB
137    /// chunks so memory usage is constant regardless of file size.
138    fn fingerprint(path: &Path) -> Result<String> {
139        crate::stream::stream_hash(path)
140    }
141
142    /// Recursively collect all files under `dir`, returning paths
143    /// relative to `dir`.
144    fn collect_files(dir: &Path) -> Result<Vec<PathBuf>> {
145        let mut files = Vec::new();
146        if !dir.exists() {
147            return Ok(files);
148        }
149        Self::walk(dir, dir, &mut files)?;
150        files.sort();
151        Ok(files)
152    }
153
154    /// Recursive directory walker.
155    fn walk(base: &Path, current: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
156        let entries = fs::read_dir(current).with_context(|| {
157            format!("cannot read directory: {}", current.display())
158        })?;
159        for entry in entries {
160            let entry = entry?;
161            let path = entry.path();
162            if path.is_dir() {
163                Self::walk(base, &path, out)?;
164            } else {
165                let rel = path
166                    .strip_prefix(base)
167                    .with_context(|| "strip_prefix failed")?;
168                out.push(rel.to_path_buf());
169            }
170        }
171        Ok(())
172    }
173
174    // -----------------------------------------------------------------
175    // Public query / mutation API
176    // -----------------------------------------------------------------
177
178    /// Return the set of files in `content_dir` that have changed since
179    /// the fingerprints were last recorded, plus any newly added files.
180    ///
181    /// Deleted files (present in cache but absent on disk) are *not*
182    /// included in the returned list, but they will be removed from the
183    /// internal map on the next [`update`](Self::update) call.
184    ///
185    /// The returned paths are **absolute**.
186    ///
187    /// # Errors
188    ///
189    /// Returns an error if `content_dir` cannot be read or individual
190    /// files cannot be hashed.
191    pub fn changed_files(&self, content_dir: &Path) -> Result<Vec<PathBuf>> {
192        let files = Self::collect_files(content_dir)?;
193        let mut changed = Vec::new();
194
195        for rel in &files {
196            let abs = content_dir.join(rel);
197            let hash = Self::fingerprint(&abs)?;
198
199            match self.fingerprints.get(rel) {
200                Some(cached) if *cached == hash => {
201                    // unchanged -- skip
202                }
203                _ => {
204                    changed.push(abs);
205                }
206            }
207        }
208
209        Ok(changed)
210    }
211
212    /// Re-scan `content_dir` and replace the entire fingerprint map
213    /// with fresh hashes.
214    ///
215    /// Call this after a successful build so the next invocation of
216    /// [`changed_files`](Self::changed_files) reflects the new state.
217    ///
218    /// # Errors
219    ///
220    /// Returns an error if files cannot be read.
221    pub fn update(&mut self, content_dir: &Path) -> Result<()> {
222        let files = Self::collect_files(content_dir)?;
223        let mut map = HashMap::with_capacity(files.len());
224
225        for rel in files {
226            let abs = content_dir.join(&rel);
227            let hash = Self::fingerprint(&abs)?;
228            let _prev = map.insert(rel, hash);
229        }
230
231        self.fingerprints = map;
232        Ok(())
233    }
234
235    /// Return the number of entries currently in the fingerprint map.
236    #[must_use]
237    pub fn len(&self) -> usize {
238        self.fingerprints.len()
239    }
240
241    /// Return `true` if the fingerprint map is empty.
242    #[must_use]
243    pub fn is_empty(&self) -> bool {
244        self.fingerprints.is_empty()
245    }
246
247    /// Return the path to the default cache file relative to the
248    /// project root.
249    #[must_use]
250    pub const fn default_path() -> &'static str {
251        DEFAULT_CACHE_FILE
252    }
253}
254
255// =====================================================================
256// Tests
257// =====================================================================
258#[cfg(test)]
259#[allow(unused_results, clippy::unwrap_used, clippy::expect_used)]
260mod tests {
261    use super::*;
262    use std::fs;
263    use tempfile::TempDir;
264
265    /// Helper: create a temp dir with a few content files and return
266    /// `(tmp_dir, content_dir, cache_path)`.
267    fn setup() -> (TempDir, PathBuf, PathBuf) {
268        let tmp = TempDir::new().ok().unwrap();
269        let content = tmp.path().join("content");
270        fs::create_dir_all(&content).ok();
271        let cache_path = tmp.path().join(".ssg-cache.json");
272        (tmp, content, cache_path)
273    }
274
275    fn write_file(dir: &Path, name: &str, contents: &str) {
276        let p = dir.join(name);
277        if let Some(parent) = p.parent() {
278            fs::create_dir_all(parent).ok();
279        }
280        fs::write(&p, contents).ok();
281    }
282
283    // 1. Loading a missing cache yields an empty map.
284    #[test]
285    fn load_missing_cache() {
286        let tmp = TempDir::new().ok().unwrap();
287        let cache_path = tmp.path().join("nonexistent.json");
288        let cache = BuildCache::load(&cache_path).ok().unwrap();
289        assert!(cache.is_empty());
290    }
291
292    // 2. Loading a valid cache round-trips correctly.
293    #[test]
294    fn load_valid_cache() {
295        let (_tmp, content, cache_path) = setup();
296        write_file(&content, "a.md", "hello");
297
298        let mut cache = BuildCache::load(&cache_path).ok().unwrap();
299        cache.update(&content).ok();
300        cache.save().ok();
301
302        let loaded = BuildCache::load(&cache_path).ok().unwrap();
303        assert_eq!(loaded.len(), 1);
304    }
305
306    // 3. Detect changed files.
307    #[test]
308    fn detect_changes() {
309        let (_tmp, content, cache_path) = setup();
310        write_file(&content, "a.md", "v1");
311
312        let mut cache = BuildCache::load(&cache_path).ok().unwrap();
313        cache.update(&content).ok();
314        cache.save().ok();
315
316        // Modify the file.
317        write_file(&content, "a.md", "v2");
318
319        let cache2 = BuildCache::load(&cache_path).ok().unwrap();
320        let changed = cache2.changed_files(&content).ok().unwrap();
321        assert_eq!(changed.len(), 1);
322        assert!(changed[0].ends_with("a.md"));
323    }
324
325    // 4. No changes detected when content is identical.
326    #[test]
327    fn detect_no_changes() {
328        let (_tmp, content, cache_path) = setup();
329        write_file(&content, "a.md", "same");
330
331        let mut cache = BuildCache::load(&cache_path).ok().unwrap();
332        cache.update(&content).ok();
333        cache.save().ok();
334
335        let cache2 = BuildCache::load(&cache_path).ok().unwrap();
336        let changed = cache2.changed_files(&content).ok().unwrap();
337        assert!(changed.is_empty());
338    }
339
340    // 5. New files appear as changed.
341    #[test]
342    fn new_files_are_changed() {
343        let (_tmp, content, cache_path) = setup();
344        write_file(&content, "a.md", "hello");
345
346        let mut cache = BuildCache::load(&cache_path).ok().unwrap();
347        cache.update(&content).ok();
348        cache.save().ok();
349
350        // Add a new file.
351        write_file(&content, "b.md", "world");
352
353        let cache2 = BuildCache::load(&cache_path).ok().unwrap();
354        let changed = cache2.changed_files(&content).ok().unwrap();
355        assert_eq!(changed.len(), 1);
356        assert!(changed[0].ends_with("b.md"));
357    }
358
359    // 6. Deleted files are pruned from the map on update.
360    #[test]
361    fn deleted_files_pruned() {
362        let (_tmp, content, cache_path) = setup();
363        write_file(&content, "a.md", "keep");
364        write_file(&content, "b.md", "delete-me");
365
366        let mut cache = BuildCache::load(&cache_path).ok().unwrap();
367        cache.update(&content).ok();
368        assert_eq!(cache.len(), 2);
369
370        // Delete one file.
371        fs::remove_file(content.join("b.md")).ok();
372
373        cache.update(&content).ok();
374        assert_eq!(cache.len(), 1);
375    }
376
377    // 7. Save / load round-trip preserves all entries.
378    #[test]
379    fn save_load_roundtrip() {
380        let (_tmp, content, cache_path) = setup();
381        write_file(&content, "x.md", "data1");
382        write_file(&content, "sub/y.md", "data2");
383
384        let mut cache = BuildCache::new(&cache_path);
385        cache.update(&content).ok();
386        cache.save().ok();
387
388        let loaded = BuildCache::load(&cache_path).ok().unwrap();
389        assert_eq!(loaded.len(), 2);
390    }
391
392    // 8. Empty content directory yields no changed files.
393    #[test]
394    fn empty_directory() {
395        let (_tmp, content, cache_path) = setup();
396        let cache = BuildCache::load(&cache_path).ok().unwrap();
397        let changed = cache.changed_files(&content).ok().unwrap();
398        assert!(changed.is_empty());
399    }
400
401    // 9. Non-existent content directory yields no changed files.
402    #[test]
403    fn nonexistent_directory() {
404        let tmp = TempDir::new().ok().unwrap();
405        let cache_path = tmp.path().join(".ssg-cache.json");
406        let cache = BuildCache::load(&cache_path).ok().unwrap();
407        let changed =
408            cache.changed_files(&tmp.path().join("nope")).ok().unwrap();
409        assert!(changed.is_empty());
410    }
411
412    // 10. Fingerprint is deterministic for the same content.
413    #[test]
414    fn fingerprint_deterministic() {
415        let tmp = TempDir::new().ok().unwrap();
416        let path = tmp.path().join("test.txt");
417        fs::write(&path, "deterministic").ok();
418
419        let h1 = BuildCache::fingerprint(&path).ok().unwrap();
420        let h2 = BuildCache::fingerprint(&path).ok().unwrap();
421        assert_eq!(h1, h2);
422    }
423
424    // 11. Different content produces different fingerprints.
425    #[test]
426    fn fingerprint_varies_with_content() {
427        let tmp = TempDir::new().ok().unwrap();
428        let p1 = tmp.path().join("a.txt");
429        let p2 = tmp.path().join("b.txt");
430        fs::write(&p1, "alpha").ok();
431        fs::write(&p2, "beta").ok();
432
433        let h1 = BuildCache::fingerprint(&p1).ok().unwrap();
434        let h2 = BuildCache::fingerprint(&p2).ok().unwrap();
435        assert_ne!(h1, h2);
436    }
437
438    // 12. Subdirectory files are tracked correctly.
439    #[test]
440    fn subdirectory_tracking() {
441        let (_tmp, content, cache_path) = setup();
442        write_file(&content, "posts/2024/hello.md", "hi");
443        write_file(&content, "pages/about.md", "about");
444
445        let mut cache = BuildCache::new(&cache_path);
446        cache.update(&content).ok();
447        assert_eq!(cache.len(), 2);
448
449        // Modify nested file.
450        write_file(&content, "posts/2024/hello.md", "updated");
451        let changed = cache.changed_files(&content).ok().unwrap();
452        assert_eq!(changed.len(), 1);
453    }
454
455    // 13. Corrupted JSON in cache file returns an error.
456    #[test]
457    fn build_cache_load_corrupted_json() {
458        // Arrange
459        let tmp = TempDir::new().ok().unwrap();
460        let cache_path = tmp.path().join(".ssg-cache.json");
461        fs::write(&cache_path, "{ not valid json !!!").ok();
462
463        // Act
464        let result = BuildCache::load(&cache_path);
465
466        // Assert — malformed JSON must produce an error
467        assert!(result.is_err(), "corrupted JSON should fail to load");
468    }
469
470    // 14. Empty directory produces no changes.
471    #[test]
472    fn build_cache_empty_directory() {
473        // Arrange
474        let (_tmp, content, cache_path) = setup();
475        let mut cache = BuildCache::new(&cache_path);
476        cache.update(&content).ok();
477
478        // Act
479        let changed = cache.changed_files(&content).ok().unwrap();
480
481        // Assert
482        assert!(changed.is_empty(), "empty directory should have no changes");
483        assert_eq!(cache.len(), 0);
484    }
485
486    // 15. File present in cache but deleted from disk is detected on update.
487    #[test]
488    fn build_cache_file_removed_detected() {
489        // Arrange
490        let (_tmp, content, cache_path) = setup();
491        write_file(&content, "a.md", "keep");
492        write_file(&content, "b.md", "remove-me");
493
494        let mut cache = BuildCache::new(&cache_path);
495        cache.update(&content).ok();
496        assert_eq!(cache.len(), 2);
497
498        // Act — delete one file, then update the cache
499        fs::remove_file(content.join("b.md")).ok();
500        cache.update(&content).ok();
501
502        // Assert — removed file is no longer in the fingerprint map
503        assert_eq!(cache.len(), 1, "deleted file should be pruned from cache");
504    }
505
506    // 17. default_path() returns the compile-time constant.
507    #[test]
508    fn default_path_returns_compile_time_constant() {
509        // Covers the const fn at lines 250-252. The function is a
510        // trivial static-string accessor but it's part of the
511        // public API so we exercise it explicitly.
512        assert_eq!(BuildCache::default_path(), DEFAULT_CACHE_FILE);
513        assert!(!BuildCache::default_path().is_empty());
514    }
515
516    // 18. walk() propagates read_dir errors via with_context.
517    #[test]
518    fn walk_errors_on_nonexistent_directory() {
519        // Covers the with_context format! closure at lines 156-158.
520        // We call the walker directly with a path that doesn't
521        // exist — fs::read_dir returns Err, the closure fires, and
522        // the format! inside it evaluates (closing lines 157-158).
523        let tmp = TempDir::new().ok().unwrap();
524        let missing = tmp.path().join("does-not-exist");
525        let mut out = Vec::new();
526        let result = BuildCache::walk(tmp.path(), &missing, &mut out);
527        assert!(result.is_err(), "walk should Err on missing dir");
528        let msg = format!("{:?}", result.unwrap_err());
529        assert!(
530            msg.contains("cannot read directory"),
531            "error should contain with_context message: {msg}"
532        );
533    }
534
535    // 16. Unchanged files do not appear in the changed list.
536    #[test]
537    fn build_cache_unchanged_files_not_reported() {
538        // Arrange
539        let (_tmp, content, cache_path) = setup();
540        write_file(&content, "a.md", "stable");
541        write_file(&content, "b.md", "also stable");
542
543        let mut cache = BuildCache::new(&cache_path);
544        cache.update(&content).ok();
545        cache.save().ok();
546
547        // Act — reload without modifying any files
548        let cache2 = BuildCache::load(&cache_path).ok().unwrap();
549        let changed = cache2.changed_files(&content).ok().unwrap();
550
551        // Assert — nothing should be reported as changed
552        assert!(
553            changed.is_empty(),
554            "unchanged files must not be in changed list"
555        );
556    }
557}