Skip to main content

ssg/
collections.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Typed content collection API (issue #456).
5//!
6//! Mirrors the ergonomics of Astro's `getCollection` / `getEntry`
7//! and Eleventy's collection helpers, but with **compile-time type
8//! safety** via serde. Authors define a struct that derives
9//! `serde::Deserialize`, then load every Markdown file under a
10//! directory as `Vec<Entry<T>>` with one call.
11//!
12//! # Quick start
13//!
14//! ```no_run
15//! use serde::Deserialize;
16//! use ssg::collections::{get_collection, Entry};
17//!
18//! #[derive(Debug, Deserialize)]
19//! struct BlogPost {
20//!     title: String,
21//!     date: String,
22//!     description: Option<String>,
23//!     #[serde(default)]
24//!     tags: Vec<String>,
25//! }
26//!
27//! # fn main() -> anyhow::Result<()> {
28//! let posts: Vec<Entry<BlogPost>> =
29//!     get_collection("content/blog")?;
30//!
31//! for post in posts {
32//!     println!("{} ({})", post.data.title, post.slug);
33//! }
34//! # Ok(())
35//! # }
36//! ```
37//!
38//! # Why typed?
39//!
40//! Hand-rolling frontmatter access via `serde_yml::Value` or string
41//! lookups produces stringly-typed code that fails at runtime when a
42//! field is renamed or its type changes. The typed API surfaces the
43//! mismatch as a compile error or a clean `Result::Err` at load
44//! time, with the file path in the error chain.
45//!
46//! # Loading semantics
47//!
48//! - **Walks recursively** under the given directory.
49//! - **Markdown only** (`.md`, `.markdown`). Other files are skipped.
50//! - **Skips files without frontmatter** silently — they're treated
51//!   as plain pages outside the collection.
52//! - **Returns parse errors with context**: each error carries the
53//!   absolute path of the file that failed.
54//! - **Slug derivation**: the slug is the file's `stem` (filename
55//!   without extension). `index.md` files in subdirectories use the
56//!   subdirectory name as the slug.
57//! - **Deterministic ordering**: entries are returned sorted by
58//!   slug so consumers that hash the result (e.g. for golden tests
59//!   or perf benchmarks) get stable output.
60//!
61//! # Single-entry access
62//!
63//! [`get_entry`] loads exactly one file by slug, returning
64//! `Ok(None)` if no matching `.md` is found. Use this when a page
65//! references another by its known slug (sidebar layouts, related
66//! posts).
67
68use anyhow::{Context, Result};
69use serde::de::DeserializeOwned;
70use std::path::{Path, PathBuf};
71use std::{fs, io};
72
73/// One parsed entry from a content collection.
74///
75/// `data` is the typed frontmatter (your struct), `body` is the raw
76/// Markdown body (everything after the closing `---`). `slug` and
77/// `path` give callers enough information to build URLs and
78/// breadcrumbs without re-parsing the filename.
79#[non_exhaustive]
80#[derive(Debug, Clone)]
81pub struct Entry<T> {
82    /// Parsed frontmatter, deserialised into the caller's struct.
83    pub data: T,
84    /// Raw Markdown body (frontmatter delimiters stripped).
85    pub body: String,
86    /// URL-style slug derived from the filename.
87    pub slug: String,
88    /// Absolute path of the source file on disk.
89    pub path: PathBuf,
90}
91
92/// Loads every Markdown file under `dir` whose frontmatter matches
93/// `T`. Returns entries sorted by slug.
94///
95/// # Errors
96///
97/// - Returns the first I/O error encountered while walking the
98///   directory.
99/// - Returns the first frontmatter deserialisation error, with the
100///   failing path in the error chain (`anyhow::Error::context`).
101///
102/// Files without a frontmatter delimiter are silently skipped — the
103/// collection is for *structured* content, and pages without
104/// frontmatter aren't part of the schema.
105///
106/// # Determinism
107///
108/// Output is sorted by `Entry::slug` (lexicographic). Callers that
109/// hash collections for golden tests or fingerprinting benefit
110/// directly.
111pub fn get_collection<T: DeserializeOwned>(
112    dir: impl AsRef<Path>,
113) -> Result<Vec<Entry<T>>> {
114    let dir = dir.as_ref();
115    let mut files = Vec::new();
116    walk_markdown(dir, &mut files)?;
117    files.sort();
118
119    let mut out = Vec::with_capacity(files.len());
120    for path in files {
121        let entry = load_entry::<T>(&path)?;
122        if let Some(e) = entry {
123            out.push(e);
124        }
125    }
126
127    out.sort_by(|a, b| a.slug.cmp(&b.slug));
128    Ok(out)
129}
130
131/// Loads a single entry from `dir` whose slug matches `slug`.
132///
133/// Returns `Ok(None)` when no Markdown file with that slug exists.
134/// Use [`get_collection`] when you need every entry or when you
135/// don't know the slug ahead of time.
136///
137/// # Errors
138///
139/// Same as [`get_collection`].
140pub fn get_entry<T: DeserializeOwned>(
141    dir: impl AsRef<Path>,
142    slug: &str,
143) -> Result<Option<Entry<T>>> {
144    let dir = dir.as_ref();
145    let mut files = Vec::new();
146    walk_markdown(dir, &mut files)?;
147
148    for path in files {
149        let candidate = derive_slug(&path, dir);
150        if candidate == slug {
151            return load_entry::<T>(&path);
152        }
153    }
154    Ok(None)
155}
156
157fn walk_markdown(dir: &Path, out: &mut Vec<PathBuf>) -> io::Result<()> {
158    if !dir.is_dir() {
159        return Ok(());
160    }
161    for entry in fs::read_dir(dir)? {
162        let entry = entry?;
163        let path = entry.path();
164        if path.is_dir() {
165            walk_markdown(&path, out)?;
166        } else if path.extension().is_some_and(|e| {
167            e.eq_ignore_ascii_case("md") || e.eq_ignore_ascii_case("markdown")
168        }) {
169            out.push(path);
170        }
171    }
172    Ok(())
173}
174
175fn load_entry<T: DeserializeOwned>(path: &Path) -> Result<Option<Entry<T>>> {
176    let raw = fs::read_to_string(path)
177        .with_context(|| format!("read {}", path.display()))?;
178    let Ok((fm, body)) = frontmatter_gen::extract(&raw) else {
179        return Ok(None); // no frontmatter — not part of collection
180    };
181    let json_map = crate::frontmatter::frontmatter_to_json(&fm);
182    let json_value = serde_json::Value::Object(json_map.into_iter().collect());
183    let data: T = serde_json::from_value(json_value).with_context(|| {
184        format!("deserialize frontmatter from {}", path.display())
185    })?;
186    let dir_anchor = path.parent().unwrap_or(path);
187    Ok(Some(Entry {
188        data,
189        body: body.to_string(),
190        slug: derive_slug(path, dir_anchor),
191        path: path.to_path_buf(),
192    }))
193}
194
195/// Derives the URL-style slug from a file path:
196///
197/// - `posts/hello-world.md` → `hello-world`
198/// - `posts/about/index.md` → `about` (parent dir name)
199/// - `posts/index.md` → `index`
200fn derive_slug(path: &Path, _dir: &Path) -> String {
201    let stem = path
202        .file_stem()
203        .map(|s| s.to_string_lossy().to_string())
204        .unwrap_or_default();
205    if stem == "index" {
206        if let Some(parent) = path.parent().and_then(Path::file_name) {
207            return parent.to_string_lossy().to_string();
208        }
209    }
210    stem
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use serde::Deserialize;
217    use tempfile::tempdir;
218
219    #[derive(Debug, Deserialize, PartialEq, Eq)]
220    struct Post {
221        title: String,
222        date: String,
223        #[serde(default)]
224        tags: Vec<String>,
225    }
226
227    fn write_post(dir: &Path, name: &str, body: &str) {
228        let path = dir.join(name);
229        if let Some(parent) = path.parent() {
230            fs::create_dir_all(parent).unwrap();
231        }
232        fs::write(path, body).unwrap();
233    }
234
235    #[test]
236    fn derive_slug_uses_file_stem() {
237        let p = PathBuf::from("posts/hello-world.md");
238        assert_eq!(derive_slug(&p, Path::new("posts")), "hello-world");
239    }
240
241    #[test]
242    fn derive_slug_index_uses_parent_dir() {
243        let p = PathBuf::from("posts/about/index.md");
244        assert_eq!(derive_slug(&p, Path::new("posts")), "about");
245    }
246
247    #[test]
248    fn get_collection_loads_typed_entries() {
249        let dir = tempdir().unwrap();
250        // Inline YAML list — frontmatter-gen 0.0.5 doesn't support
251        // the multi-line `- item` form for nested lists. Inline form
252        // (`[rust, ssg]`) is the canonical short syntax it accepts.
253        write_post(
254            dir.path(),
255            "first.md",
256            "---\ntitle: First\ndate: 2026-01-01\ntags: [rust, ssg]\n---\nBody one.\n",
257        );
258        write_post(
259            dir.path(),
260            "second.md",
261            "---\ntitle: Second\ndate: 2026-01-02\n---\nBody two.\n",
262        );
263
264        let posts: Vec<Entry<Post>> = get_collection(dir.path()).unwrap();
265        assert_eq!(posts.len(), 2);
266        // Sorted by slug.
267        assert_eq!(posts[0].slug, "first");
268        assert_eq!(posts[1].slug, "second");
269        assert_eq!(posts[0].data.title, "First");
270        assert!(posts[0].body.starts_with("Body one"));
271    }
272
273    #[test]
274    fn get_collection_skips_files_without_frontmatter() {
275        let dir = tempdir().unwrap();
276        write_post(dir.path(), "naked.md", "# No frontmatter\n");
277        write_post(
278            dir.path(),
279            "ok.md",
280            "---\ntitle: x\ndate: 2026-01-01\n---\n",
281        );
282        let posts: Vec<Entry<Post>> = get_collection(dir.path()).unwrap();
283        assert_eq!(posts.len(), 1);
284        assert_eq!(posts[0].slug, "ok");
285    }
286
287    #[test]
288    fn get_collection_recurses_into_subdirectories() {
289        let dir = tempdir().unwrap();
290        write_post(
291            dir.path(),
292            "a.md",
293            "---\ntitle: A\ndate: 2026-01-01\n---\n",
294        );
295        write_post(
296            dir.path(),
297            "nested/b.md",
298            "---\ntitle: B\ndate: 2026-01-02\n---\n",
299        );
300        let posts: Vec<Entry<Post>> = get_collection(dir.path()).unwrap();
301        assert_eq!(posts.len(), 2);
302    }
303
304    #[test]
305    fn get_collection_returns_error_with_path_context_on_bad_yaml() {
306        let dir = tempdir().unwrap();
307        write_post(
308            dir.path(),
309            "broken.md",
310            "---\ntitle: 12\ndate: 2026-01-01\n---\n",
311        );
312        // `title` is required to be a String; passing 12 deserialises
313        // ok actually because serde-yml coerces. Make a real type
314        // mismatch:
315        write_post(
316            dir.path(),
317            "bad.md",
318            "---\ntitle:\n  - a list\ndate: 2026-01-01\n---\n",
319        );
320        let err = get_collection::<Post>(dir.path()).unwrap_err();
321        let chain: String = err
322            .chain()
323            .map(|c| c.to_string())
324            .collect::<Vec<_>>()
325            .join("\n");
326        assert!(
327            chain.contains("bad.md") || chain.contains("broken.md"),
328            "expected file path in error chain, got: {chain}"
329        );
330    }
331
332    #[test]
333    fn get_entry_finds_by_slug() {
334        let dir = tempdir().unwrap();
335        write_post(
336            dir.path(),
337            "hello.md",
338            "---\ntitle: H\ndate: 2026-01-01\n---\nbody\n",
339        );
340        let post: Option<Entry<Post>> = get_entry(dir.path(), "hello").unwrap();
341        assert!(post.is_some());
342        assert_eq!(post.unwrap().data.title, "H");
343    }
344
345    #[test]
346    fn get_entry_returns_none_for_unknown_slug() {
347        let dir = tempdir().unwrap();
348        write_post(
349            dir.path(),
350            "exists.md",
351            "---\ntitle: E\ndate: 2026-01-01\n---\n",
352        );
353        let post: Option<Entry<Post>> =
354            get_entry(dir.path(), "missing").unwrap();
355        assert!(post.is_none());
356    }
357
358    #[test]
359    fn get_collection_empty_dir_returns_empty_vec() {
360        let dir = tempdir().unwrap();
361        let posts: Vec<Entry<Post>> = get_collection(dir.path()).unwrap();
362        assert!(posts.is_empty());
363    }
364
365    #[test]
366    fn get_collection_missing_dir_returns_empty_vec() {
367        let posts: Vec<Entry<Post>> =
368            get_collection("/nonexistent/path/here").unwrap();
369        assert!(posts.is_empty());
370    }
371}