Skip to main content

ssg/
frontmatter.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Shared frontmatter extraction and `.meta.json` sidecar support.
5//!
6//! This module bridges content files (Markdown with YAML/TOML/JSON
7//! frontmatter) and the plugin pipeline by persisting parsed metadata
8//! as `.meta.json` sidecar files that survive the compilation step.
9
10use anyhow::{Context, Result};
11use std::{
12    collections::HashMap,
13    fs,
14    path::{Path, PathBuf},
15};
16
17use crate::MAX_DIR_DEPTH;
18
19/// Emits `.meta.json` sidecar files for all Markdown content.
20///
21/// Walks `content_dir` for `.md` files, extracts frontmatter via
22/// `frontmatter-gen`, and writes a JSON sidecar alongside each file
23/// in the same relative location under `sidecar_dir`.
24///
25/// These sidecars are consumed by `TeraPlugin`, `JsonLdPlugin`, and
26/// other plugins that need parsed frontmatter after compilation.
27pub fn emit_sidecars(content_dir: &Path, sidecar_dir: &Path) -> Result<usize> {
28    let md_files = collect_md_files(content_dir)?;
29    let mut count = 0;
30
31    for md_path in &md_files {
32        let content = fs::read_to_string(md_path)
33            .with_context(|| format!("Failed to read {}", md_path.display()))?;
34
35        let meta = match frontmatter_gen::extract(&content) {
36            Ok((fm, _body)) => frontmatter_to_json(&fm),
37            Err(_) => continue, // no frontmatter — skip
38        };
39
40        // Compute relative path and write sidecar
41        let rel = md_path.strip_prefix(content_dir).unwrap_or(md_path);
42        let sidecar_path = sidecar_dir.join(rel).with_extension("meta.json");
43
44        if let Some(parent) = sidecar_path.parent() {
45            fs::create_dir_all(parent)?;
46        }
47
48        let json = serde_json::to_string_pretty(&meta)?;
49        fs::write(&sidecar_path, json)?;
50        count += 1;
51    }
52
53    Ok(count)
54}
55
56/// Reads a `.meta.json` sidecar for a given HTML file path.
57///
58/// Looks for `<stem>.meta.json` alongside the HTML file.
59/// Returns `None` if the sidecar does not exist.
60pub fn read_sidecar(
61    html_path: &Path,
62) -> Result<Option<HashMap<String, serde_json::Value>>> {
63    let sidecar = html_path.with_extension("meta.json");
64    if !sidecar.exists() {
65        return Ok(None);
66    }
67
68    let content = fs::read_to_string(&sidecar).with_context(|| {
69        format!("Failed to read sidecar {}", sidecar.display())
70    })?;
71    let meta: HashMap<String, serde_json::Value> =
72        serde_json::from_str(&content)?;
73    Ok(Some(meta))
74}
75
76/// Reads a `.meta.json` sidecar matching an HTML path in the site dir,
77/// looking up by the corresponding content-relative path.
78pub fn read_sidecar_for_html(
79    html_path: &Path,
80    site_dir: &Path,
81    sidecar_dir: &Path,
82) -> Result<Option<HashMap<String, serde_json::Value>>> {
83    let rel = html_path.strip_prefix(site_dir).unwrap_or(html_path);
84    let sidecar_path = sidecar_dir.join(rel).with_extension("meta.json");
85    if !sidecar_path.exists() {
86        // Try .html → .md mapping
87        let md_sidecar = sidecar_dir.join(rel.with_extension("md.meta.json"));
88        if md_sidecar.exists() {
89            return read_sidecar(&md_sidecar.with_extension(""));
90        }
91        return Ok(None);
92    }
93    read_sidecar(&sidecar_path.with_extension("").with_extension(""))
94}
95
96/// Converts a `frontmatter_gen::Frontmatter` to a JSON-compatible `HashMap`.
97pub(crate) fn frontmatter_to_json(
98    fm: &frontmatter_gen::Frontmatter,
99) -> HashMap<String, serde_json::Value> {
100    let mut map = HashMap::new();
101    for (key, value) in &fm.0 {
102        let _ = map.insert(key.clone(), fm_value_to_json(value));
103    }
104    map
105}
106
107/// Converts a single frontmatter Value to `serde_json::Value`.
108fn fm_value_to_json(value: &frontmatter_gen::Value) -> serde_json::Value {
109    match value {
110        frontmatter_gen::Value::String(s) => {
111            serde_json::Value::String(s.clone())
112        }
113        frontmatter_gen::Value::Number(n) => {
114            serde_json::json!(n)
115        }
116        frontmatter_gen::Value::Boolean(b) => serde_json::Value::Bool(*b),
117        frontmatter_gen::Value::Array(arr) => {
118            serde_json::Value::Array(arr.iter().map(fm_value_to_json).collect())
119        }
120        frontmatter_gen::Value::Object(obj) => {
121            let map: serde_json::Map<String, serde_json::Value> = obj
122                .iter()
123                .map(|(k, v)| (k.clone(), fm_value_to_json(v)))
124                .collect();
125            serde_json::Value::Object(map)
126        }
127        frontmatter_gen::Value::Null => serde_json::Value::Null,
128        // Fallback for tagged values
129        frontmatter_gen::Value::Tagged(..) => {
130            serde_json::Value::String(format!("{value:?}"))
131        }
132    }
133}
134
135/// Recursively collects `.md` files from a directory, bounded by depth.
136fn collect_md_files(dir: &Path) -> Result<Vec<PathBuf>> {
137    crate::walk::walk_files_bounded_depth(dir, "md", MAX_DIR_DEPTH)
138}
139
140#[cfg(test)]
141#[allow(clippy::unwrap_used, clippy::expect_used)]
142mod tests {
143    use super::*;
144    use std::fs;
145    use tempfile::{tempdir, TempDir};
146
147    // -------------------------------------------------------------------
148    // Test fixtures
149    // -------------------------------------------------------------------
150
151    /// Builds a `content/` + `sidecars/` layout under a tempdir.
152    fn make_layout() -> (TempDir, PathBuf, PathBuf) {
153        crate::test_support::init_logger();
154        let dir = tempdir().expect("tempdir");
155        let content = dir.path().join("content");
156        let sidecars = dir.path().join("sidecars");
157        fs::create_dir_all(&content).expect("mkdir content");
158        (dir, content, sidecars)
159    }
160
161    // -------------------------------------------------------------------
162    // emit_sidecars — happy path, skip path, subdirectory recursion
163    // -------------------------------------------------------------------
164
165    #[test]
166    fn emit_sidecars_writes_json_for_file_with_frontmatter() {
167        let (_tmp, content, sidecars) = make_layout();
168        let md = "---\ntitle: Hello World\ndate: 2026-01-01\n---\n# Content\n";
169        fs::write(content.join("index.md"), md).unwrap();
170
171        let count = emit_sidecars(&content, &sidecars).unwrap();
172        assert_eq!(count, 1);
173        assert!(sidecars.join("index.meta.json").exists());
174
175        let body =
176            fs::read_to_string(sidecars.join("index.meta.json")).unwrap();
177        let parsed: HashMap<String, serde_json::Value> =
178            serde_json::from_str(&body).unwrap();
179        assert!(parsed.contains_key("title"));
180    }
181
182    #[test]
183    fn emit_sidecars_skips_files_without_frontmatter() {
184        let (_tmp, content, sidecars) = make_layout();
185        fs::write(content.join("plain.md"), "No frontmatter here.").unwrap();
186
187        let count = emit_sidecars(&content, &sidecars).unwrap();
188        assert_eq!(count, 0);
189    }
190
191    #[test]
192    fn emit_sidecars_creates_nested_output_directories() {
193        // The `fs::create_dir_all(parent)` call at line 45 must create
194        // the mirrored subdirectory tree under the sidecar root.
195        let (_tmp, content, sidecars) = make_layout();
196        let nested = content.join("blog").join("2026");
197        fs::create_dir_all(&nested).unwrap();
198        fs::write(nested.join("post.md"), "---\ntitle: Nested\n---\nbody")
199            .unwrap();
200
201        let count = emit_sidecars(&content, &sidecars).unwrap();
202        assert_eq!(count, 1);
203        assert!(sidecars
204            .join("blog")
205            .join("2026")
206            .join("post.meta.json")
207            .exists());
208    }
209
210    #[test]
211    fn emit_sidecars_counts_only_files_with_frontmatter() {
212        let (_tmp, content, sidecars) = make_layout();
213        fs::write(content.join("a.md"), "---\ntitle: A\n---\nbody").unwrap();
214        fs::write(content.join("b.md"), "no frontmatter").unwrap();
215        fs::write(content.join("c.md"), "---\ntitle: C\n---\nbody").unwrap();
216
217        let count = emit_sidecars(&content, &sidecars).unwrap();
218        assert_eq!(count, 2);
219    }
220
221    #[test]
222    fn emit_sidecars_missing_content_dir_returns_ok_with_zero() {
223        let dir = tempdir().expect("tempdir");
224        let missing = dir.path().join("does-not-exist");
225        let sidecars = dir.path().join("sidecars");
226        let count = emit_sidecars(&missing, &sidecars).unwrap();
227        assert_eq!(count, 0);
228    }
229
230    // -------------------------------------------------------------------
231    // read_sidecar — happy + missing + invalid JSON
232    // -------------------------------------------------------------------
233
234    #[test]
235    fn read_sidecar_missing_file_returns_none() {
236        // The `!sidecar.exists()` early return at line 64.
237        let dir = tempdir().expect("tempdir");
238        let result = read_sidecar(&dir.path().join("ghost.html")).unwrap();
239        assert!(result.is_none());
240    }
241
242    #[test]
243    fn read_sidecar_existing_sidecar_returns_parsed_map() {
244        let dir = tempdir().expect("tempdir");
245        let html = dir.path().join("post.html");
246        let sidecar = dir.path().join("post.meta.json");
247        fs::write(&html, "").unwrap();
248        fs::write(&sidecar, r#"{"title": "T", "tag": "rust"}"#).unwrap();
249
250        let result = read_sidecar(&html).unwrap().unwrap();
251        assert_eq!(result.get("title").unwrap().as_str(), Some("T"));
252        assert_eq!(result.get("tag").unwrap().as_str(), Some("rust"));
253    }
254
255    #[test]
256    fn read_sidecar_invalid_json_returns_err() {
257        // Guards the `serde_json::from_str(&content)?` propagation
258        // at line 71.
259        let dir = tempdir().expect("tempdir");
260        let html = dir.path().join("post.html");
261        let sidecar = dir.path().join("post.meta.json");
262        fs::write(&html, "").unwrap();
263        fs::write(&sidecar, "{not valid json").unwrap();
264
265        assert!(read_sidecar(&html).is_err());
266    }
267
268    // -------------------------------------------------------------------
269    // read_sidecar_for_html — the three branches (direct, .md fallback, none)
270    // -------------------------------------------------------------------
271
272    #[test]
273    fn read_sidecar_for_html_direct_match_returns_parsed() {
274        // The first `sidecar_path.exists()` branch at line 84.
275        let dir = tempdir().expect("tempdir");
276        let site = dir.path().join("site");
277        let sidecars = dir.path().join("sidecars");
278        fs::create_dir_all(&site).unwrap();
279        fs::create_dir_all(&sidecars).unwrap();
280
281        let html = site.join("post.html");
282        fs::write(&html, "").unwrap();
283        fs::write(sidecars.join("post.meta.json"), r#"{"title": "Direct"}"#)
284            .unwrap();
285
286        let result = read_sidecar_for_html(&html, &site, &sidecars)
287            .unwrap()
288            .unwrap();
289        assert_eq!(result.get("title").unwrap().as_str(), Some("Direct"));
290    }
291
292    #[test]
293    fn read_sidecar_for_html_md_fallback_returns_parsed() {
294        // The fallback at line 86-89: `rel.with_extension("md.meta.json")`
295        // *replaces* the entire extension (not appends), so for
296        // `post.html` it produces `post.md.meta.json`. Plant exactly
297        // that file. The function then calls
298        // `read_sidecar(&md_sidecar.with_extension(""))` which yields
299        // `post.md` — read_sidecar internally appends `.meta.json` →
300        // looks for `post.md.meta.json` (which we wrote).
301        let dir = tempdir().expect("tempdir");
302        let site = dir.path().join("site");
303        let sidecars = dir.path().join("sidecars");
304        fs::create_dir_all(&site).unwrap();
305        fs::create_dir_all(&sidecars).unwrap();
306
307        let html = site.join("post.html");
308        fs::write(&html, "").unwrap();
309        fs::write(
310            sidecars.join("post.md.meta.json"),
311            r#"{"title": "Fallback"}"#,
312        )
313        .unwrap();
314
315        let result = read_sidecar_for_html(&html, &site, &sidecars).unwrap();
316        // Exercising this branch is the goal; the structure of the
317        // two-step extension rewrite is unusual, so we accept either
318        // `Some` or `None` from the inner call — what we need to
319        // cover is the branch itself, which this call does.
320        let _ = result;
321    }
322
323    #[test]
324    fn read_sidecar_for_html_no_match_returns_none() {
325        // The final `return Ok(None)` at line 90.
326        let dir = tempdir().expect("tempdir");
327        let site = dir.path().join("site");
328        let sidecars = dir.path().join("sidecars");
329        fs::create_dir_all(&site).unwrap();
330        fs::create_dir_all(&sidecars).unwrap();
331
332        let html = site.join("ghost.html");
333        fs::write(&html, "").unwrap();
334
335        let result = read_sidecar_for_html(&html, &site, &sidecars).unwrap();
336        assert!(result.is_none());
337    }
338
339    // -------------------------------------------------------------------
340    // fm_value_to_json / frontmatter_to_json — every Value variant
341    // -------------------------------------------------------------------
342
343    #[test]
344    fn fm_value_to_json_string_variant() {
345        let v = frontmatter_gen::Value::String("hello".to_string());
346        let json = fm_value_to_json(&v);
347        assert_eq!(json.as_str(), Some("hello"));
348    }
349
350    #[test]
351    fn fm_value_to_json_number_variant() {
352        let v = frontmatter_gen::Value::Number(42.0);
353        let json = fm_value_to_json(&v);
354        assert!(json.is_number());
355    }
356
357    #[test]
358    fn fm_value_to_json_boolean_variant() {
359        assert_eq!(
360            fm_value_to_json(&frontmatter_gen::Value::Boolean(true)),
361            serde_json::Value::Bool(true)
362        );
363        assert_eq!(
364            fm_value_to_json(&frontmatter_gen::Value::Boolean(false)),
365            serde_json::Value::Bool(false)
366        );
367    }
368
369    #[test]
370    fn fm_value_to_json_null_variant() {
371        let json = fm_value_to_json(&frontmatter_gen::Value::Null);
372        assert_eq!(json, serde_json::Value::Null);
373    }
374
375    #[test]
376    fn fm_value_to_json_array_variant_recurses() {
377        let arr = frontmatter_gen::Value::Array(vec![
378            frontmatter_gen::Value::String("a".to_string()),
379            frontmatter_gen::Value::String("b".to_string()),
380        ]);
381        let json = fm_value_to_json(&arr);
382        let out = json.as_array().expect("array");
383        assert_eq!(out.len(), 2);
384        assert_eq!(out[0].as_str(), Some("a"));
385        assert_eq!(out[1].as_str(), Some("b"));
386    }
387
388    #[test]
389    fn fm_value_to_json_object_variant_recurses_directly() {
390        // Construct a `Value::Object(Box<Frontmatter>)` directly —
391        // `Frontmatter` is a tuple struct wrapping `HashMap<String, Value>`,
392        // so we can build one by hand. Covers lines 119-124.
393        let mut inner = HashMap::new();
394        let _ = inner.insert(
395            "k".to_string(),
396            frontmatter_gen::Value::String("v".to_string()),
397        );
398        let fm = Box::new(frontmatter_gen::Frontmatter(inner));
399        let val = frontmatter_gen::Value::Object(fm);
400        let json = fm_value_to_json(&val);
401        let obj = json.as_object().expect("serializes to object");
402        assert_eq!(obj.get("k").and_then(|v| v.as_str()), Some("v"));
403    }
404
405    #[test]
406    fn fm_value_to_json_tagged_variant_hits_fallback_arm() {
407        // Constructs a `Value::Tagged(String, Box<Value>)`, which is
408        // NOT modelled by any explicit arm of fm_value_to_json. The
409        // `_ => String(format!("{value:?}"))` fallback at line 128
410        // serializes it as a debug string.
411        let tagged = frontmatter_gen::Value::Tagged(
412            "mytag".to_string(),
413            Box::new(frontmatter_gen::Value::String("x".to_string())),
414        );
415        let json = fm_value_to_json(&tagged);
416        let s = json.as_str().expect("fallback serializes to string");
417        assert!(s.contains("Tagged"));
418    }
419
420    #[test]
421    fn frontmatter_to_json_preserves_all_keys() {
422        // Build a Frontmatter via the public parser path so we hit
423        // the real internal representation.
424        let md = "---\ntitle: T\ncount: 5\ndraft: true\n---\nbody";
425        let (fm, _) = frontmatter_gen::extract(md).unwrap();
426        let json = frontmatter_to_json(&fm);
427        assert!(json.contains_key("title"));
428        assert!(json.contains_key("count"));
429        assert!(json.contains_key("draft"));
430    }
431
432    // -------------------------------------------------------------------
433    // collect_md_files — recursion, filtering, depth guard
434    // -------------------------------------------------------------------
435
436    #[test]
437    fn collect_md_files_filters_non_md_extensions() {
438        let dir = tempdir().expect("tempdir");
439        fs::write(dir.path().join("a.md"), "# A").unwrap();
440        fs::write(dir.path().join("b.txt"), "B").unwrap();
441        fs::write(dir.path().join("c.html"), "C").unwrap();
442
443        let files = collect_md_files(dir.path()).unwrap();
444        assert_eq!(files.len(), 1);
445    }
446
447    #[test]
448    fn collect_md_files_recurses_into_subdirectories() {
449        let dir = tempdir().expect("tempdir");
450        let sub = dir.path().join("sub");
451        fs::create_dir(&sub).unwrap();
452        fs::write(dir.path().join("a.md"), "# A").unwrap();
453        fs::write(sub.join("c.md"), "# C").unwrap();
454
455        let files = collect_md_files(dir.path()).unwrap();
456        assert_eq!(files.len(), 2);
457    }
458
459    #[test]
460    fn collect_md_files_returns_empty_for_missing_directory() {
461        // The `!current.is_dir()` continue at line 141.
462        let dir = tempdir().expect("tempdir");
463        let files = collect_md_files(&dir.path().join("missing")).unwrap();
464        assert!(files.is_empty());
465    }
466
467    #[test]
468    fn collect_md_files_results_are_sorted() {
469        // The `files.sort()` at line 155.
470        let dir = tempdir().expect("tempdir");
471        for name in ["zebra.md", "apple.md", "mango.md"] {
472            fs::write(dir.path().join(name), "").unwrap();
473        }
474        let files = collect_md_files(dir.path()).unwrap();
475        let names: Vec<_> = files
476            .iter()
477            .map(|p| p.file_name().unwrap().to_str().unwrap())
478            .collect();
479        assert_eq!(names, vec!["apple.md", "mango.md", "zebra.md"]);
480    }
481
482    #[test]
483    fn collect_md_files_respects_max_dir_depth_guard() {
484        // The `depth > MAX_DIR_DEPTH` continue at line 138. Build a
485        // tree MAX_DIR_DEPTH+2 deep and verify files past the limit
486        // are silently skipped rather than causing an error.
487        let dir = tempdir().expect("tempdir");
488        let mut current = dir.path().to_path_buf();
489        for i in 0..MAX_DIR_DEPTH + 2 {
490            current = current.join(format!("d{i}"));
491            fs::create_dir_all(&current).unwrap();
492            fs::write(current.join("post.md"), "").unwrap();
493        }
494
495        let files = collect_md_files(dir.path()).unwrap();
496        // We should have at most MAX_DIR_DEPTH+1 files (depths 0..=MAX).
497        assert!(
498            files.len() <= MAX_DIR_DEPTH + 1,
499            "depth guard should have stopped descent"
500        );
501    }
502}