Skip to main content

ssg/
shortcodes.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Shortcode expansion plugin.
5//!
6//! Preprocesses Markdown content before compilation, expanding
7//! `{{< shortcode args >}}` patterns into HTML fragments.
8
9use crate::plugin::{Plugin, PluginContext};
10use crate::MAX_DIR_DEPTH;
11use anyhow::Result;
12use std::{
13    collections::HashMap,
14    fs,
15    path::{Path, PathBuf},
16};
17
18/// Plugin that expands shortcodes in Markdown content.
19///
20/// Runs in `before_compile` to transform content before staticdatagen
21/// processes it.
22///
23/// Built-in shortcodes:
24/// - `{{< youtube id="..." >}}` — responsive `YouTube` embed
25/// - `{{< gist user="..." id="..." >}}` — GitHub gist embed
26/// - `{{< figure src="..." alt="..." caption="..." >}}` — figure with caption
27/// - `{{< warning >}}...{{< /warning >}}` — admonition blocks
28/// - `{{< info >}}...{{< /info >}}`
29/// - `{{< tip >}}...{{< /tip >}}`
30/// - `{{< danger >}}...{{< /danger >}}`
31#[derive(Debug, Clone, Copy)]
32pub struct ShortcodePlugin;
33
34impl Plugin for ShortcodePlugin {
35    fn name(&self) -> &'static str {
36        "shortcodes"
37    }
38
39    fn before_compile(&self, ctx: &PluginContext) -> Result<()> {
40        if !ctx.content_dir.exists() {
41            return Ok(());
42        }
43
44        let md_files = collect_md_files(&ctx.content_dir)?;
45        let mut expanded = 0usize;
46
47        for path in &md_files {
48            let content = fs::read_to_string(path)?;
49            let result = expand_shortcodes(&content);
50            if result != content {
51                fs::write(path, &result)?;
52                expanded += 1;
53            }
54        }
55
56        if expanded > 0 {
57            log::info!(
58                "[shortcodes] Expanded shortcodes in {expanded} file(s)"
59            );
60        }
61        Ok(())
62    }
63}
64
65/// Expands all shortcodes in a string.
66#[must_use]
67pub fn expand_shortcodes(input: &str) -> String {
68    let mut result = input.to_string();
69
70    // Block shortcodes: {{< name >}}...{{< /name >}}
71    for name in &["warning", "info", "tip", "danger"] {
72        result = expand_block_shortcode(&result, name);
73    }
74
75    // Inline shortcodes: {{< name key="value" >}}
76    result = expand_inline_shortcodes(&result);
77
78    result
79}
80
81/// Expands block shortcodes like `{{< warning >}}...{{< /warning >}}`.
82fn expand_block_shortcode(input: &str, name: &str) -> String {
83    let open = format!("{{{{< {name} >}}}}");
84    let close = format!("{{{{< /{name} >}}}}");
85    let mut result = input.to_string();
86
87    while let Some(start) = result.find(&open) {
88        let after_open = start + open.len();
89        if let Some(end_offset) = result[after_open..].find(&close) {
90            let end = after_open + end_offset;
91            let inner = result[after_open..end].trim();
92            let html = format!(
93                "<div class=\"admonition admonition-{}\" role=\"note\">\n\
94                 <p class=\"admonition-title\">{}</p>\n\
95                 <div class=\"admonition-content\">\n{}\n</div>\n</div>",
96                name,
97                capitalize(name),
98                inner
99            );
100            result = format!(
101                "{}{}{}",
102                &result[..start],
103                html,
104                &result[end + close.len()..]
105            );
106        } else {
107            break;
108        }
109    }
110
111    result
112}
113
114/// Expands inline shortcodes like `{{< youtube id="..." >}}`.
115///
116/// Safe for non-ASCII input: byte-level slicing is guarded by
117/// `is_char_boundary` and fallthrough characters are iterated via
118/// `char_indices()` so multi-byte codepoints (emoji, `©`, etc.) are
119/// preserved verbatim rather than truncated mid-byte.
120fn expand_inline_shortcodes(input: &str) -> String {
121    let mut result = String::with_capacity(input.len());
122    let mut pos = 0;
123
124    while pos < input.len() {
125        // The opening marker "{{<" is pure ASCII so byte-level
126        // comparison is safe *as long as pos lands on a char
127        // boundary*. Guard with is_char_boundary to be explicit.
128        if input.is_char_boundary(pos)
129            && pos + 3 <= input.len()
130            && input.as_bytes()[pos] == b'{'
131            && input.as_bytes()[pos + 1] == b'{'
132            && input.as_bytes()[pos + 2] == b'<'
133        {
134            if let Some(end) = input[pos..].find(">}}") {
135                let tag = input[pos + 3..pos + end].trim();
136                let html = render_inline_shortcode(tag);
137                result.push_str(&html);
138                pos += end + 3;
139                continue;
140            }
141        }
142        // Fallthrough: push the next full codepoint, not just one
143        // byte — this handles multi-byte UTF-8 characters cleanly.
144        // The `chars().next()` is guaranteed to be `Some` because
145        // the loop guard `pos < input.len()` ensures the suffix is
146        // non-empty, so an `expect` here cannot panic.
147        #[allow(clippy::expect_used)]
148        let c = input[pos..]
149            .chars()
150            .next()
151            .expect("loop guard ensures pos < input.len()");
152        result.push(c);
153        pos += c.len_utf8();
154    }
155
156    result
157}
158
159/// Renders a single inline shortcode tag content.
160fn render_inline_shortcode(tag: &str) -> String {
161    let parts = parse_shortcode_attrs(tag);
162    let name = parts.get("_name").map_or("", String::as_str);
163
164    match name {
165        "youtube" => {
166            let id = parts.get("id").map_or("", String::as_str);
167            if id.is_empty() {
168                return "<!-- youtube: missing id -->".to_string();
169            }
170            format!(
171                "<div class=\"video-container\" style=\"position:relative;padding-bottom:56.25%;height:0;overflow:hidden\">\
172                 <iframe src=\"https://www.youtube-nocookie.com/embed/{id}\" \
173                 style=\"position:absolute;top:0;left:0;width:100%;height:100%\" \
174                 frameborder=\"0\" allowfullscreen loading=\"lazy\" \
175                 title=\"YouTube video\"></iframe></div>"
176            )
177        }
178        "gist" => {
179            let user = parts.get("user").map_or("", String::as_str);
180            let id = parts.get("id").map_or("", String::as_str);
181            if user.is_empty() || id.is_empty() {
182                return "<!-- gist: missing user or id -->".to_string();
183            }
184            format!(
185                "<script src=\"https://gist.github.com/{user}/{id}.js\"></script>"
186            )
187        }
188        "figure" => {
189            let src = parts.get("src").map_or("", String::as_str);
190            let alt = parts.get("alt").map_or("", String::as_str);
191            let caption = parts.get("caption").map_or("", String::as_str);
192            let mut html = format!(
193                "<figure><img src=\"{src}\" alt=\"{alt}\" loading=\"lazy\">"
194            );
195            if !caption.is_empty() {
196                html.push_str(&format!("<figcaption>{caption}</figcaption>"));
197            }
198            html.push_str("</figure>");
199            html
200        }
201        "island" => {
202            let component = parts.get("component").map_or("", String::as_str);
203            let hydrate =
204                parts.get("hydrate").map_or("visible", String::as_str);
205            let props = parts.get("props").map_or("{}", String::as_str);
206            if component.is_empty() {
207                return "<!-- island: missing component -->".to_string();
208            }
209            format!(
210                "<ssg-island component=\"{component}\" hydrate=\"{hydrate}\" props='{props}'>\
211                 <template shadowrootmode=\"open\"><slot></slot></template>\
212                 </ssg-island>"
213            )
214        }
215        _ => format!("<!-- unknown shortcode: {name} -->"),
216    }
217}
218
219/// Parses shortcode attributes: `name key="value" key2="value2"`
220fn parse_shortcode_attrs(tag: &str) -> HashMap<String, String> {
221    let mut attrs = HashMap::new();
222    let trimmed = tag.trim();
223
224    // First token is the shortcode name
225    let mut chars = trimmed.char_indices().peekable();
226    let mut name_end = 0;
227    while let Some(&(i, c)) = chars.peek() {
228        if c.is_whitespace() {
229            name_end = i;
230            break;
231        }
232        name_end = i + c.len_utf8();
233        let _ = chars.next();
234    }
235    let _ = attrs.insert("_name".to_string(), trimmed[..name_end].to_string());
236
237    // Parse key="value" pairs
238    let rest = &trimmed[name_end..];
239    let mut pos = 0;
240    while pos < rest.len() {
241        // Skip whitespace
242        while pos < rest.len() && rest.as_bytes()[pos].is_ascii_whitespace() {
243            pos += 1;
244        }
245        if pos >= rest.len() {
246            break;
247        }
248
249        // Find key
250        let key_start = pos;
251        while pos < rest.len() && rest.as_bytes()[pos] != b'=' {
252            pos += 1;
253        }
254        if pos >= rest.len() {
255            break;
256        }
257        let key = rest[key_start..pos].trim().to_string();
258        pos += 1; // skip =
259
260        // Find value (quoted)
261        if pos < rest.len() && rest.as_bytes()[pos] == b'"' {
262            pos += 1;
263            let val_start = pos;
264            while pos < rest.len() && rest.as_bytes()[pos] != b'"' {
265                pos += 1;
266            }
267            let val = rest[val_start..pos].to_string();
268            let _ = attrs.insert(key, val);
269            pos += 1; // skip closing "
270        }
271    }
272
273    attrs
274}
275
276fn capitalize(s: &str) -> String {
277    let mut c = s.chars();
278    match c.next() {
279        None => String::new(),
280        Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
281    }
282}
283
284fn collect_md_files(dir: &Path) -> Result<Vec<PathBuf>> {
285    crate::walk::walk_files_bounded_depth(dir, "md", MAX_DIR_DEPTH)
286}
287
288#[cfg(test)]
289#[allow(clippy::unwrap_used, clippy::expect_used)]
290mod tests {
291    use super::*;
292
293    #[test]
294    fn test_youtube_shortcode() {
295        let input = r#"Check this: {{< youtube id="abc123" >}}"#;
296        let result = expand_shortcodes(input);
297        assert!(result.contains("youtube-nocookie.com/embed/abc123"));
298        assert!(result.contains("video-container"));
299    }
300
301    #[test]
302    fn test_gist_shortcode() {
303        let input = r#"{{< gist user="octocat" id="12345" >}}"#;
304        let result = expand_shortcodes(input);
305        assert!(result.contains("gist.github.com/octocat/12345.js"));
306    }
307
308    #[test]
309    fn test_figure_shortcode() {
310        let input = r#"{{< figure src="/img/photo.jpg" alt="A photo" caption="My photo" >}}"#;
311        let result = expand_shortcodes(input);
312        assert!(result.contains("<figure>"));
313        assert!(result.contains("alt=\"A photo\""));
314        assert!(result.contains("<figcaption>My photo</figcaption>"));
315    }
316
317    #[test]
318    fn test_warning_block() {
319        let input = "{{< warning >}}\nBe careful!\n{{< /warning >}}";
320        let result = expand_shortcodes(input);
321        assert!(result.contains("admonition-warning"));
322        assert!(result.contains("Warning"));
323        assert!(result.contains("Be careful!"));
324    }
325
326    #[test]
327    fn test_info_block() {
328        let input = "{{< info >}}\nNote this.\n{{< /info >}}";
329        let result = expand_shortcodes(input);
330        assert!(result.contains("admonition-info"));
331        assert!(result.contains("Info"));
332    }
333
334    #[test]
335    fn test_unknown_shortcode() {
336        let input = r#"{{< unknown key="val" >}}"#;
337        let result = expand_shortcodes(input);
338        assert!(result.contains("<!-- unknown shortcode: unknown -->"));
339    }
340
341    #[test]
342    fn test_no_shortcodes() {
343        let input = "Regular markdown with no shortcodes.";
344        let result = expand_shortcodes(input);
345        assert_eq!(result, input);
346    }
347
348    #[test]
349    fn test_parse_attrs() {
350        let attrs = parse_shortcode_attrs(r#"youtube id="abc" "#);
351        assert_eq!(attrs.get("_name").unwrap(), "youtube");
352        assert_eq!(attrs.get("id").unwrap(), "abc");
353    }
354
355    // -------------------------------------------------------------------
356    // Plugin surface + missing-param branches
357    // -------------------------------------------------------------------
358
359    #[test]
360    fn name_returns_static_shortcodes_identifier() {
361        assert_eq!(ShortcodePlugin.name(), "shortcodes");
362    }
363
364    #[test]
365    fn before_compile_missing_content_dir_returns_ok() {
366        // Line 41: `!ctx.content_dir.exists()` early return.
367        let dir = tempfile::tempdir().unwrap();
368        let missing = dir.path().join("missing");
369        let ctx =
370            PluginContext::new(&missing, dir.path(), dir.path(), dir.path());
371        ShortcodePlugin.before_compile(&ctx).unwrap();
372    }
373
374    #[test]
375    fn before_compile_no_markdown_files_is_noop() {
376        let dir = tempfile::tempdir().unwrap();
377        let content = dir.path().join("content");
378        fs::create_dir_all(&content).unwrap();
379        let ctx =
380            PluginContext::new(&content, dir.path(), dir.path(), dir.path());
381        ShortcodePlugin.before_compile(&ctx).unwrap();
382    }
383
384    #[test]
385    fn before_compile_unchanged_file_is_not_rewritten() {
386        // Line 50 else branch: file doesn't contain any shortcodes,
387        // so result == content and the fs::write is skipped.
388        let dir = tempfile::tempdir().unwrap();
389        let content = dir.path().join("content");
390        fs::create_dir_all(&content).unwrap();
391        let body = "plain markdown no shortcodes";
392        fs::write(content.join("p.md"), body).unwrap();
393        let ctx =
394            PluginContext::new(&content, dir.path(), dir.path(), dir.path());
395        ShortcodePlugin.before_compile(&ctx).unwrap();
396        assert_eq!(fs::read_to_string(content.join("p.md")).unwrap(), body);
397    }
398
399    // -------------------------------------------------------------------
400    // render_inline_shortcode — missing required-param branches
401    // -------------------------------------------------------------------
402
403    #[test]
404    fn render_inline_shortcode_youtube_missing_id_emits_comment() {
405        // Line 146: the `if id.is_empty()` branch.
406        let result = expand_shortcodes(r"{{< youtube >}}");
407        assert!(result.contains("<!-- youtube: missing id -->"));
408    }
409
410    #[test]
411    fn render_inline_shortcode_gist_missing_user_emits_comment() {
412        // Line 160: the `if user.is_empty() || id.is_empty()` branch.
413        let result = expand_shortcodes(r#"{{< gist id="123" >}}"#);
414        assert!(result.contains("<!-- gist: missing user or id -->"));
415    }
416
417    #[test]
418    fn render_inline_shortcode_gist_missing_id_emits_comment() {
419        let result = expand_shortcodes(r#"{{< gist user="octocat" >}}"#);
420        assert!(result.contains("<!-- gist: missing user or id -->"));
421    }
422
423    #[test]
424    fn render_inline_shortcode_figure_without_caption_omits_figcaption() {
425        // Line 173 else branch: no caption argument means the
426        // `<figcaption>` is NOT appended.
427        let result =
428            expand_shortcodes(r#"{{< figure src="/a.jpg" alt="A" >}}"#);
429        assert!(result.contains("<figure>"));
430        assert!(result.contains(r#"alt="A""#));
431        assert!(!result.contains("<figcaption>"));
432    }
433
434    #[test]
435    fn render_inline_shortcode_figure_with_caption_includes_figcaption() {
436        let result = expand_shortcodes(
437            r#"{{< figure src="/a.jpg" alt="A" caption="Hi" >}}"#,
438        );
439        assert!(result.contains("<figcaption>Hi</figcaption>"));
440    }
441
442    // -------------------------------------------------------------------
443    // expand_block_shortcode — unterminated block break
444    // -------------------------------------------------------------------
445
446    #[test]
447    fn expand_block_shortcode_unterminated_breaks_out_cleanly() {
448        // Line 107: the `break` branch when the closing tag is
449        // missing. `expand_block_shortcode` must return without
450        // looping forever; the inline-shortcode pass then
451        // subsequently processes the unterminated `{{< warning >}}`
452        // as an unknown inline tag. What matters for coverage is
453        // that the function terminates.
454        let input = "{{< warning >}}\nno closing tag\n";
455        let result = expand_block_shortcode(input, "warning");
456        // Since there's no close tag, the block expander leaves
457        // the input untouched.
458        assert_eq!(result, input);
459    }
460
461    // -------------------------------------------------------------------
462    // capitalize
463    // -------------------------------------------------------------------
464
465    #[test]
466    fn capitalize_empty_string_returns_empty() {
467        assert_eq!(capitalize(""), "");
468    }
469
470    #[test]
471    fn capitalize_single_word_uppercases_first_letter() {
472        assert_eq!(capitalize("warning"), "Warning");
473        assert_eq!(capitalize("info"), "Info");
474    }
475
476    // -------------------------------------------------------------------
477    // collect_md_files — depth guard + filter
478    // -------------------------------------------------------------------
479
480    #[test]
481    fn shortcodes_collect_md_files_respects_max_dir_depth() {
482        let dir = tempfile::tempdir().unwrap();
483        let mut current = dir.path().to_path_buf();
484        for i in 0..MAX_DIR_DEPTH + 2 {
485            current = current.join(format!("d{i}"));
486            fs::create_dir_all(&current).unwrap();
487            fs::write(current.join("p.md"), "").unwrap();
488        }
489        let files = collect_md_files(dir.path()).unwrap();
490        assert!(files.len() <= MAX_DIR_DEPTH + 1);
491    }
492
493    #[test]
494    fn parse_shortcode_attrs_trailing_whitespace_breaks_outer_loop() {
495        // Line 210: `if pos >= rest.len() { break }` after the
496        // whitespace-skip loop. Trigger by trailing whitespace
497        // immediately after the shortcode name (no key=value).
498        let attrs = parse_shortcode_attrs("name   ");
499        assert_eq!(attrs.get("_name").unwrap(), "name");
500        assert_eq!(attrs.len(), 1);
501    }
502
503    #[test]
504    fn parse_shortcode_attrs_with_value_then_trailing_whitespace() {
505        let attrs = parse_shortcode_attrs("youtube id=\"x\"   ");
506        assert_eq!(attrs.get("_name").unwrap(), "youtube");
507        assert_eq!(attrs.get("id").unwrap(), "x");
508    }
509
510    #[test]
511    fn parse_shortcode_attrs_key_without_equals_breaks() {
512        // Line 219: `if pos >= rest.len() { break }` when searching
513        // for `=` falls off the end of the string.
514        let attrs = parse_shortcode_attrs("youtube id=\"x\" trailingflag");
515        assert_eq!(attrs.get("_name").unwrap(), "youtube");
516        // The trailing token without `=` is silently dropped.
517        assert!(!attrs.contains_key("trailingflag"));
518    }
519
520    #[test]
521    fn parse_shortcode_attrs_unquoted_value_is_dropped() {
522        // Line 234: the `if rest.as_bytes()[pos] == b'"'` is FALSE
523        // for unquoted values, so the body is skipped.
524        let attrs = parse_shortcode_attrs("name id=unquoted");
525        assert_eq!(attrs.get("_name").unwrap(), "name");
526        assert!(!attrs.contains_key("id"));
527    }
528
529    #[test]
530    fn expand_inline_shortcodes_unterminated_tag_falls_through_to_pushchar() {
531        // Line 128 path: when `{{<` is found but no `>}}` follows,
532        // the if-let returns None and the byte-by-byte fallback at
533        // line 130 takes over.
534        let result = expand_shortcodes("text {{< unterminated");
535        assert!(result.contains("text"));
536        assert!(result.contains("unterminated"));
537    }
538
539    #[test]
540    fn shortcodes_collect_md_files_filters_non_md_extensions() {
541        let dir = tempfile::tempdir().unwrap();
542        fs::write(dir.path().join("a.md"), "").unwrap();
543        fs::write(dir.path().join("b.txt"), "").unwrap();
544        let files = collect_md_files(dir.path()).unwrap();
545        assert_eq!(files.len(), 1);
546    }
547
548    #[test]
549    fn test_plugin_expands_files() {
550        let dir = tempfile::tempdir().unwrap();
551        let content = dir.path().join("content");
552        fs::create_dir_all(&content).unwrap();
553        fs::write(
554            content.join("test.md"),
555            r#"---
556title: Test
557---
558{{< youtube id="xyz" >}}
559"#,
560        )
561        .unwrap();
562
563        let ctx =
564            PluginContext::new(&content, dir.path(), dir.path(), dir.path());
565        ShortcodePlugin.before_compile(&ctx).unwrap();
566
567        let result = fs::read_to_string(content.join("test.md")).unwrap();
568        assert!(result.contains("youtube-nocookie.com"));
569    }
570}
571
572#[cfg(test)]
573#[allow(clippy::unwrap_used, clippy::expect_used)]
574mod proptests {
575    use super::*;
576    use proptest::prelude::*;
577
578    proptest! {
579        #![proptest_config(ProptestConfig::with_cases(1000))]
580
581        /// `expand_shortcodes` must never panic on arbitrary input.
582        #[test]
583        fn expand_never_panics(input in "\\PC*") {
584            let _ = expand_shortcodes(&input);
585        }
586
587        /// Strings without `{{<` must pass through unchanged.
588        #[test]
589        fn no_shortcode_identity(input in "[^{]*") {
590            let output = expand_shortcodes(&input);
591            prop_assert_eq!(&output, &input);
592        }
593    }
594}