Skip to main content

ssg/
markdown_ext.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! GitHub Flavored Markdown (GFM) extensions plugin.
5//!
6//! Pre-processes Markdown content in the `before_compile` phase to add
7//! support for GFM features that the upstream renderer does not handle:
8//!
9//! - **Tables** — `| col | col |` blocks with a `|---|---|` separator row.
10//! - **Strikethrough** — `~~text~~` becomes `<del>text</del>`.
11//! - **Task lists** — `- [ ] item` and `- [x] done` become checkbox lists.
12//! - **Footnotes** — `[^id]` references with `[^id]:` definitions.
13//!
14//! ## How it works
15//!
16//! For each `.md` file under `content_dir`, the plugin:
17//! 1. Splits the YAML/TOML frontmatter from the body so it stays untouched.
18//! 2. Walks the body line-by-line, tracking fenced code blocks so GFM
19//!    syntax inside ``` ``` ``` ``` blocks is preserved literally.
20//! 3. Detects GFM-specific blocks (tables, task lists) and renders **only
21//!    those blocks** through `pulldown-cmark` with the matching options
22//!    enabled, substituting the rendered HTML back into the source.
23//! 4. Applies an inline strikethrough transform to remaining text.
24//!
25//! Standard markdown renderers pass block-level raw HTML through
26//! unchanged, so the substituted HTML composes cleanly with whatever
27//! renderer staticdatagen runs afterwards.
28//!
29//! ## Example
30//!
31//! ```rust
32//! use ssg::plugin::PluginManager;
33//! use ssg::markdown_ext::MarkdownExtPlugin;
34//!
35//! let mut pm = PluginManager::new();
36//! pm.register(MarkdownExtPlugin);
37//! ```
38
39use crate::plugin::{Plugin, PluginContext};
40use crate::walk::walk_files_bounded_depth;
41use crate::MAX_DIR_DEPTH;
42use anyhow::{Context, Result};
43use pulldown_cmark::{html as cmark_html, Options, Parser};
44use std::fs;
45
46/// Plugin that expands GFM Markdown extensions in source files.
47///
48/// Runs in `before_compile`. See the [module-level docs](self) for the
49/// full list of supported features and the transformation strategy.
50#[allow(clippy::module_name_repetitions)]
51#[derive(Debug, Copy, Clone)]
52pub struct MarkdownExtPlugin;
53
54impl Plugin for MarkdownExtPlugin {
55    fn name(&self) -> &'static str {
56        "markdown-ext"
57    }
58
59    fn before_compile(&self, ctx: &PluginContext) -> Result<()> {
60        if !ctx.content_dir.exists() {
61            return Ok(());
62        }
63
64        let files =
65            walk_files_bounded_depth(&ctx.content_dir, "md", MAX_DIR_DEPTH)
66                .with_context(|| {
67                    format!(
68                        "Failed to walk content dir {}",
69                        ctx.content_dir.display()
70                    )
71                })?;
72
73        let mut transformed = 0usize;
74        for path in &files {
75            fail_point!("markdown_ext::read", |_| {
76                anyhow::bail!("injected: markdown_ext::read")
77            });
78            let raw = fs::read_to_string(path).with_context(|| {
79                format!("Failed to read {}", path.display())
80            })?;
81
82            let new = expand_gfm(&raw);
83            if new != raw {
84                fail_point!("markdown_ext::write", |_| {
85                    anyhow::bail!("injected: markdown_ext::write")
86                });
87                fs::write(path, &new).with_context(|| {
88                    format!("Failed to write {}", path.display())
89                })?;
90                transformed += 1;
91            }
92        }
93
94        if transformed > 0 {
95            log::info!("[markdown-ext] Transformed {transformed} file(s)");
96        }
97        Ok(())
98    }
99}
100
101/// Splits leading frontmatter (`--- ... ---`) from `input`.
102///
103/// Returns `(frontmatter, body)`. If no frontmatter is present the
104/// frontmatter slice is empty and the entire input is the body.
105fn split_frontmatter(input: &str) -> (&str, &str) {
106    if let Some(rest) = input.strip_prefix("---\n") {
107        if let Some(end) = rest.find("\n---\n") {
108            let fm_end = "---\n".len() + end + "\n---\n".len();
109            return (&input[..fm_end], &input[fm_end..]);
110        }
111        if let Some(end) = rest.find("\n---") {
112            let fm_end = "---\n".len() + end + "\n---".len();
113            // Trailing newline after closing fence is optional.
114            return (&input[..fm_end], &input[fm_end..]);
115        }
116    }
117    ("", input)
118}
119
120/// Expands all GFM constructs in `input`, returning a new string.
121///
122/// If no GFM features are present, returns the input unchanged
123/// (modulo no allocation when avoidable).
124#[must_use]
125pub fn expand_gfm(input: &str) -> String {
126    let (frontmatter, body) = split_frontmatter(input);
127    if !needs_expansion(body) {
128        return input.to_string();
129    }
130
131    let mut out = String::with_capacity(input.len() + 256);
132    out.push_str(frontmatter);
133
134    let lines: Vec<&str> = body.lines().collect();
135    let mut i = 0usize;
136    let mut in_fence = false;
137    let mut fence_marker: Option<&str> = None;
138
139    while i < lines.len() {
140        let line = lines[i];
141
142        if let Some(marker) = detect_fence(line) {
143            update_fence_state(&mut in_fence, &mut fence_marker, marker, line);
144            out.push_str(line);
145            out.push('\n');
146            i += 1;
147            continue;
148        }
149
150        if in_fence {
151            out.push_str(line);
152            out.push('\n');
153            i += 1;
154            continue;
155        }
156
157        i = process_gfm_line(&lines, i, &mut out);
158    }
159
160    if !body.ends_with('\n') && out.ends_with('\n') {
161        let _ = out.pop();
162    }
163
164    out
165}
166
167/// Updates fence tracking state when a fence marker is encountered.
168fn update_fence_state<'a>(
169    in_fence: &mut bool,
170    fence_marker: &mut Option<&'a str>,
171    marker: &'a str,
172    line: &str,
173) {
174    if !*in_fence {
175        *in_fence = true;
176        *fence_marker = Some(marker);
177    } else if fence_marker.is_some_and(|m| line.trim_start().starts_with(m)) {
178        *in_fence = false;
179        *fence_marker = None;
180    }
181}
182
183/// Processes a single non-fenced line, detecting tables, task lists, or
184/// applying strikethrough. Returns the new line index.
185fn process_gfm_line(lines: &[&str], i: usize, out: &mut String) -> usize {
186    let line = lines[i];
187
188    if i + 1 < lines.len() && is_table_header(line, lines[i + 1]) {
189        let end = find_table_end(lines, i);
190        let block = lines[i..end].join("\n");
191        out.push_str(&render_with_options(&block, Options::ENABLE_TABLES));
192        out.push('\n');
193        return end;
194    }
195
196    if is_task_list_line(line) {
197        let end = find_task_list_end(lines, i);
198        let block = lines[i..end].join("\n");
199        out.push_str(&render_with_options(&block, Options::ENABLE_TASKLISTS));
200        out.push('\n');
201        return end;
202    }
203
204    out.push_str(&apply_strikethrough(line));
205    out.push('\n');
206    i + 1
207}
208
209/// Returns `true` if `body` contains any GFM-specific syntax that this
210/// plugin would transform.
211fn needs_expansion(body: &str) -> bool {
212    if body.contains("~~") {
213        return true;
214    }
215    if body.lines().any(is_task_list_line) {
216        return true;
217    }
218    has_table(body)
219}
220
221/// Detects whether `body` contains any GFM table block.
222fn has_table(body: &str) -> bool {
223    let lines: Vec<&str> = body.lines().collect();
224    lines.windows(2).any(|w| is_table_header(w[0], w[1]))
225}
226
227/// Returns the fence marker (` ``` ` or `~~~`) if `line` opens or
228/// closes a fenced code block.
229fn detect_fence(line: &str) -> Option<&'static str> {
230    let trimmed = line.trim_start();
231    if trimmed.starts_with("```") {
232        Some("```")
233    } else if trimmed.starts_with("~~~") {
234        Some("~~~")
235    } else {
236        None
237    }
238}
239
240/// Returns `true` if `header` looks like a table header followed by a
241/// `|---|---|` separator row on `separator`.
242fn is_table_header(header: &str, separator: &str) -> bool {
243    if !header.contains('|') {
244        return false;
245    }
246    is_separator_row(separator)
247}
248
249/// Returns `true` if `line` is a GFM table separator row like
250/// `| --- | :---: | ---: |`.
251fn is_separator_row(line: &str) -> bool {
252    let t = line.trim();
253    if !t.contains('-') || !t.contains('|') {
254        return false;
255    }
256    t.chars().all(|c| matches!(c, '|' | '-' | ':' | ' ' | '\t'))
257}
258
259/// Returns the index *just past* the last contiguous table line.
260fn find_table_end(lines: &[&str], start: usize) -> usize {
261    let mut end = start + 2; // header + separator
262    while end < lines.len() {
263        let l = lines[end];
264        if l.trim().is_empty() || !l.contains('|') {
265            break;
266        }
267        end += 1;
268    }
269    end
270}
271
272/// Returns `true` if `line` is a task list item.
273fn is_task_list_line(line: &str) -> bool {
274    let t = line.trim_start();
275    if t.len() < 6 {
276        return false;
277    }
278    let bytes = t.as_bytes();
279    let bullet = bytes[0];
280    if !matches!(bullet, b'-' | b'*' | b'+') {
281        return false;
282    }
283    if bytes[1] != b' ' {
284        return false;
285    }
286    if bytes[2] != b'[' {
287        return false;
288    }
289    if !matches!(bytes[3], b' ' | b'x' | b'X') {
290        return false;
291    }
292    if bytes[4] != b']' {
293        return false;
294    }
295    bytes[5] == b' '
296}
297
298/// Returns the index just past the last contiguous task list line.
299fn find_task_list_end(lines: &[&str], start: usize) -> usize {
300    let mut end = start;
301    while end < lines.len() && is_task_list_line(lines[end]) {
302        end += 1;
303    }
304    end
305}
306
307/// Renders `markdown` to HTML using `pulldown-cmark` with `extra`
308/// options merged in alongside the always-on strikethrough flag.
309fn render_with_options(markdown: &str, extra: Options) -> String {
310    let mut opts = Options::ENABLE_STRIKETHROUGH;
311    opts.insert(extra);
312    let parser = Parser::new_ext(markdown, opts);
313    let mut html = String::with_capacity(markdown.len() + 64);
314    cmark_html::push_html(&mut html, parser);
315    html.trim_end().to_string()
316}
317
318/// Replaces `~~text~~` with `<del>text</del>` outside of inline code spans.
319fn apply_strikethrough(line: &str) -> String {
320    let bytes = line.as_bytes();
321    let mut out = String::with_capacity(line.len());
322    let mut i = 0usize;
323    let mut in_code = false;
324
325    while i < bytes.len() {
326        if bytes[i] == b'`' {
327            in_code = !in_code;
328            out.push('`');
329            i += 1;
330            continue;
331        }
332        if !in_code
333            && i + 1 < bytes.len()
334            && bytes[i] == b'~'
335            && bytes[i + 1] == b'~'
336        {
337            // Find closing `~~`.
338            if let Some(close) = find_strike_close(line, i + 2) {
339                out.push_str("<del>");
340                out.push_str(&line[i + 2..close]);
341                out.push_str("</del>");
342                i = close + 2;
343                continue;
344            }
345        }
346        out.push(bytes[i] as char);
347        i += 1;
348    }
349    out
350}
351
352/// Returns the byte offset of the next `~~` after `from`, or `None`.
353fn find_strike_close(line: &str, from: usize) -> Option<usize> {
354    let bytes = line.as_bytes();
355    let mut j = from;
356    while j + 1 < bytes.len() {
357        if bytes[j] == b'`' {
358            // Skip inline code spans inside the strike content.
359            let mut k = j + 1;
360            while k < bytes.len() && bytes[k] != b'`' {
361                k += 1;
362            }
363            j = k.saturating_add(1);
364            continue;
365        }
366        if bytes[j] == b'~' && bytes[j + 1] == b'~' {
367            return Some(j);
368        }
369        j += 1;
370    }
371    None
372}
373
374#[cfg(test)]
375#[allow(clippy::unwrap_used, clippy::expect_used)]
376mod tests {
377    use super::*;
378    use crate::plugin::Plugin;
379    use tempfile::tempdir;
380
381    #[test]
382    fn split_frontmatter_extracts_yaml_block() {
383        let input = "---\ntitle: Hello\n---\nBody here\n";
384        let (fm, body) = split_frontmatter(input);
385        assert_eq!(fm, "---\ntitle: Hello\n---\n");
386        assert_eq!(body, "Body here\n");
387    }
388
389    #[test]
390    fn split_frontmatter_returns_empty_when_absent() {
391        let input = "Just a body\nwith two lines\n";
392        let (fm, body) = split_frontmatter(input);
393        assert_eq!(fm, "");
394        assert_eq!(body, input);
395    }
396
397    #[test]
398    fn needs_expansion_detects_strikethrough() {
399        assert!(needs_expansion("hello ~~world~~"));
400    }
401
402    #[test]
403    fn needs_expansion_detects_task_list() {
404        assert!(needs_expansion("- [ ] todo\n- [x] done\n"));
405    }
406
407    #[test]
408    fn needs_expansion_detects_table() {
409        let body = "| a | b |\n|---|---|\n| 1 | 2 |\n";
410        assert!(needs_expansion(body));
411    }
412
413    #[test]
414    fn needs_expansion_returns_false_for_plain_markdown() {
415        assert!(!needs_expansion("# Heading\n\nA paragraph.\n"));
416    }
417
418    #[test]
419    fn is_separator_row_accepts_aligned_separators() {
420        assert!(is_separator_row("|---|---|"));
421        assert!(is_separator_row("| :--- | :---: | ---: |"));
422        assert!(!is_separator_row("| a | b |"));
423        assert!(!is_separator_row("plain text"));
424    }
425
426    #[test]
427    fn is_task_list_line_recognises_open_and_done() {
428        assert!(is_task_list_line("- [ ] todo"));
429        assert!(is_task_list_line("- [x] done"));
430        assert!(is_task_list_line("- [X] done"));
431        assert!(is_task_list_line("  * [ ] indented"));
432        assert!(!is_task_list_line("- regular bullet"));
433        assert!(!is_task_list_line("[ ] no bullet"));
434    }
435
436    #[test]
437    fn apply_strikethrough_wraps_simple_pair() {
438        assert_eq!(
439            apply_strikethrough("hello ~~world~~ done"),
440            "hello <del>world</del> done"
441        );
442    }
443
444    #[test]
445    fn apply_strikethrough_skips_inside_code_span() {
446        assert_eq!(
447            apply_strikethrough("`~~not~~` but ~~yes~~"),
448            "`~~not~~` but <del>yes</del>"
449        );
450    }
451
452    #[test]
453    fn apply_strikethrough_leaves_unmatched_tildes() {
454        assert_eq!(apply_strikethrough("just ~~ here"), "just ~~ here");
455    }
456
457    #[test]
458    fn expand_gfm_renders_table_block() {
459        let input = "Intro\n\n| a | b |\n|---|---|\n| 1 | 2 |\n\nOutro\n";
460        let out = expand_gfm(input);
461        assert!(out.contains("<table>"), "got: {out}");
462        assert!(out.contains("<th>a</th>"));
463        assert!(out.contains("<td>1</td>"));
464        assert!(out.contains("Intro"));
465        assert!(out.contains("Outro"));
466    }
467
468    #[test]
469    fn expand_gfm_renders_task_list_block() {
470        let input = "- [ ] one\n- [x] two\n";
471        let out = expand_gfm(input);
472        assert!(out.contains("<ul>"), "got: {out}");
473        assert!(out.contains("type=\"checkbox\""));
474        assert!(out.contains("disabled"));
475        assert!(out.contains("checked"));
476    }
477
478    #[test]
479    fn expand_gfm_renders_strikethrough_inline() {
480        let input = "Some ~~old~~ new text\n";
481        let out = expand_gfm(input);
482        assert_eq!(out, "Some <del>old</del> new text\n");
483    }
484
485    #[test]
486    fn expand_gfm_preserves_fenced_code_contents() {
487        let input =
488            "```\n| a | b |\n|---|---|\n~~not strike~~\n- [ ] not task\n```\n";
489        let out = expand_gfm(input);
490        // Nothing inside the fence should be transformed.
491        assert!(out.contains("| a | b |"));
492        assert!(out.contains("~~not strike~~"));
493        assert!(out.contains("- [ ] not task"));
494        assert!(!out.contains("<table>"));
495        assert!(!out.contains("<del>"));
496    }
497
498    #[test]
499    fn expand_gfm_preserves_frontmatter_unchanged() {
500        let input = "---\ntitle: Test\n---\n~~strike~~ this\n";
501        let out = expand_gfm(input);
502        assert!(out.starts_with("---\ntitle: Test\n---\n"));
503        assert!(out.contains("<del>strike</del>"));
504    }
505
506    #[test]
507    fn expand_gfm_returns_input_unchanged_when_no_features() {
508        let input = "# Heading\n\nA paragraph with no extensions.\n";
509        let out = expand_gfm(input);
510        assert_eq!(out, input);
511    }
512
513    #[test]
514    fn expand_gfm_handles_tildes_in_tilde_fenced_code() {
515        // ~~~ fences must also protect contents.
516        let input = "~~~\n~~text~~\n~~~\n";
517        let out = expand_gfm(input);
518        assert!(out.contains("~~text~~"));
519        assert!(!out.contains("<del>"));
520    }
521
522    #[test]
523    fn plugin_transforms_markdown_files_in_place() {
524        let dir = tempdir().unwrap();
525        let content = dir.path().join("content");
526        fs::create_dir_all(&content).unwrap();
527        fs::write(
528            content.join("post.md"),
529            "---\ntitle: Test\n---\n~~old~~ new\n",
530        )
531        .unwrap();
532        fs::write(content.join("untouched.md"), "# Plain\n\nNothing fancy.\n")
533            .unwrap();
534
535        let ctx =
536            PluginContext::new(&content, dir.path(), dir.path(), dir.path());
537        MarkdownExtPlugin.before_compile(&ctx).unwrap();
538
539        let post = fs::read_to_string(content.join("post.md")).unwrap();
540        assert!(post.contains("<del>old</del>"));
541        assert!(post.starts_with("---\ntitle: Test\n---\n"));
542
543        let untouched =
544            fs::read_to_string(content.join("untouched.md")).unwrap();
545        assert_eq!(untouched, "# Plain\n\nNothing fancy.\n");
546    }
547
548    #[test]
549    fn plugin_returns_ok_when_content_dir_missing() {
550        let dir = tempdir().unwrap();
551        let ctx = PluginContext::new(
552            &dir.path().join("missing"),
553            dir.path(),
554            dir.path(),
555            dir.path(),
556        );
557        MarkdownExtPlugin.before_compile(&ctx).unwrap();
558    }
559
560    #[test]
561    fn plugin_name_is_markdown_ext() {
562        assert_eq!(MarkdownExtPlugin.name(), "markdown-ext");
563    }
564}