Skip to main content

ssg/seo/
seo_plugin.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! SEO meta tag injection plugin.
5
6use super::helpers::{
7    escape_attr, extract_canonical, extract_description, extract_existing_meta,
8    extract_first_content_image, extract_html_lang, extract_title,
9    has_meta_tag,
10};
11use crate::plugin::{Plugin, PluginContext};
12use anyhow::Result;
13use std::path::Path;
14
15/// Injects missing SEO meta tags into HTML files.
16///
17/// After compilation, this plugin scans all HTML files in the site
18/// directory and adds any missing meta tags for description, Open Graph
19/// (title, description, type), and Twitter Card.
20///
21/// The plugin is idempotent — it checks for existing tags before
22/// injecting and will not duplicate them.
23///
24/// # Example
25///
26/// ```rust
27/// use ssg::plugin::PluginManager;
28/// use ssg::seo::SeoPlugin;
29///
30/// let mut pm = PluginManager::new();
31/// pm.register(SeoPlugin);
32/// ```
33#[derive(Debug, Clone, Copy)]
34pub struct SeoPlugin;
35
36impl Plugin for SeoPlugin {
37    fn name(&self) -> &'static str {
38        "seo"
39    }
40
41    fn has_transform(&self) -> bool {
42        true
43    }
44
45    fn transform_html(
46        &self,
47        html: &str,
48        _path: &Path,
49        _ctx: &PluginContext,
50    ) -> Result<String> {
51        inject_seo_tags_html(html)
52    }
53
54    fn after_compile(&self, _ctx: &PluginContext) -> Result<()> {
55        Ok(())
56    }
57}
58
59/// Builds Open Graph meta tags that are missing from the HTML.
60fn build_og_tags(
61    html: &str,
62    title: &str,
63    description: &str,
64    canonical: &str,
65    og_type: &str,
66) -> Vec<String> {
67    let mut tags = Vec::new();
68
69    if !has_meta_tag(html, "og:title") && !title.is_empty() {
70        tags.push(format!(
71            "<meta property=\"og:title\" content=\"{}\">",
72            escape_attr(title)
73        ));
74    }
75
76    if !has_meta_tag(html, "og:description") && !description.is_empty() {
77        tags.push(format!(
78            "<meta property=\"og:description\" content=\"{}\">",
79            escape_attr(description)
80        ));
81    }
82
83    if !has_meta_tag(html, "og:type") {
84        tags.push(format!("<meta property=\"og:type\" content=\"{og_type}\">"));
85    }
86
87    if !has_meta_tag(html, "og:url") && !canonical.is_empty() {
88        tags.push(format!(
89            "<meta property=\"og:url\" content=\"{}\">",
90            escape_attr(canonical)
91        ));
92    }
93
94    // OG image: extract from existing meta or first <img> in content
95    if !has_meta_tag(html, "og:image") {
96        let image = extract_existing_meta(html, "twitter:image");
97        let image = if image.is_empty() {
98            extract_first_content_image(html)
99        } else {
100            image
101        };
102        if !image.is_empty() {
103            tags.push(format!(
104                "<meta property=\"og:image\" content=\"{}\">",
105                escape_attr(&image)
106            ));
107            // Social platforms render cards faster with explicit dimensions
108            if !has_meta_tag(html, "og:image:width") {
109                tags.push(
110                    "<meta property=\"og:image:width\" content=\"1200\">"
111                        .to_string(),
112                );
113                tags.push(
114                    "<meta property=\"og:image:height\" content=\"630\">"
115                        .to_string(),
116                );
117            }
118        }
119    }
120
121    // OG locale
122    if !has_meta_tag(html, "og:locale") {
123        let lang = extract_html_lang(html);
124        if !lang.is_empty() {
125            let locale = lang.replace('-', "_");
126            tags.push(format!(
127                "<meta property=\"og:locale\" content=\"{}\">",
128                escape_attr(&locale)
129            ));
130        }
131    }
132
133    tags
134}
135
136/// Builds Twitter Card meta tags that are missing from the HTML.
137fn build_twitter_tags(
138    html: &str,
139    title: &str,
140    description: &str,
141    twitter_card: &str,
142) -> Vec<String> {
143    let mut tags = Vec::new();
144
145    if !has_meta_tag(html, "twitter:card") {
146        tags.push(format!(
147            "<meta name=\"twitter:card\" content=\"{twitter_card}\">"
148        ));
149    }
150
151    if !has_meta_tag(html, "twitter:title") && !title.is_empty() {
152        tags.push(format!(
153            "<meta name=\"twitter:title\" content=\"{}\">",
154            escape_attr(title)
155        ));
156    }
157
158    if !has_meta_tag(html, "twitter:description") && !description.is_empty() {
159        tags.push(format!(
160            "<meta name=\"twitter:description\" content=\"{}\">",
161            escape_attr(description)
162        ));
163    }
164
165    if !has_meta_tag(html, "twitter:image") {
166        let image = extract_existing_meta(html, "og:image");
167        let image = if image.is_empty() {
168            extract_first_content_image(html)
169        } else {
170            image
171        };
172        if !image.is_empty() {
173            tags.push(format!(
174                "<meta name=\"twitter:image\" content=\"{}\">",
175                escape_attr(&image)
176            ));
177        }
178    }
179
180    tags
181}
182
183/// Builds the meta description tag if missing from the HTML.
184fn build_meta_description(html: &str, description: &str) -> Option<String> {
185    if !has_meta_tag(html, "description") && !description.is_empty() {
186        Some(format!(
187            "<meta name=\"description\" content=\"{}\">",
188            escape_attr(description)
189        ))
190    } else {
191        None
192    }
193}
194
195/// Inject missing SEO meta tags into an HTML string, returning the modified HTML.
196fn inject_seo_tags_html(html: &str) -> Result<String> {
197    let title = extract_title(html);
198    let description = extract_description(html, 160);
199    let canonical = extract_canonical(html);
200
201    let is_article = html.contains("<article");
202    let og_type = if is_article { "article" } else { "website" };
203    let twitter_card = if is_article {
204        "summary_large_image"
205    } else {
206        "summary"
207    };
208
209    let mut tags = Vec::new();
210
211    if let Some(meta_desc) = build_meta_description(html, &description) {
212        tags.push(meta_desc);
213    }
214    tags.extend(build_og_tags(
215        html,
216        &title,
217        &description,
218        &canonical,
219        og_type,
220    ));
221    tags.extend(build_twitter_tags(html, &title, &description, twitter_card));
222
223    if tags.is_empty() {
224        return Ok(html.to_string());
225    }
226
227    let injection = tags.join("\n");
228    let result = if let Some(pos) = html.find("</head>") {
229        format!("{}{}\n{}", &html[..pos], injection, &html[pos..])
230    } else {
231        html.to_string()
232    };
233
234    Ok(result)
235}
236
237#[cfg(test)]
238#[allow(clippy::unwrap_used, clippy::expect_used)]
239mod tests {
240    use super::*;
241    use std::path::Path;
242    use tempfile::tempdir;
243
244    fn ctx(site: &Path) -> PluginContext {
245        PluginContext::new(
246            Path::new("content"),
247            Path::new("build"),
248            site,
249            Path::new("templates"),
250        )
251    }
252
253    #[test]
254    fn name_is_stable() {
255        assert_eq!(SeoPlugin.name(), "seo");
256    }
257
258    #[test]
259    fn no_op_when_site_dir_missing() {
260        let dir = tempdir().unwrap();
261        SeoPlugin
262            .after_compile(&ctx(&dir.path().join("nope")))
263            .unwrap();
264    }
265
266    // ── build_meta_description ──────────────────────────────────
267
268    #[test]
269    fn meta_description_built_when_missing_and_text_provided() {
270        let html = r#"<html><head><title>X</title></head><body></body></html>"#;
271        let out = build_meta_description(html, "A cool page");
272        assert_eq!(
273            out.as_deref(),
274            Some(r#"<meta name="description" content="A cool page">"#)
275        );
276    }
277
278    #[test]
279    fn meta_description_skipped_when_empty_text() {
280        let html = "<html><head></head></html>";
281        assert!(build_meta_description(html, "").is_none());
282    }
283
284    #[test]
285    fn meta_description_skipped_when_already_present() {
286        let html = r#"<html><head><meta name="description" content="X"></head></html>"#;
287        assert!(build_meta_description(html, "Override?").is_none());
288    }
289
290    #[test]
291    fn meta_description_escapes_attribute_value() {
292        let html = "<html><head></head></html>";
293        let out = build_meta_description(html, r#"X & "Y" <Z>"#).unwrap();
294        // No raw `&`, raw `"` between content="...", or raw `<` in attribute.
295        assert!(out.contains("content="));
296        assert!(!out.contains(r#"content="X & ""#));
297    }
298
299    // ── build_og_tags ───────────────────────────────────────────
300
301    #[test]
302    fn og_tags_includes_title_description_type_url() {
303        let html = "<html lang=\"en\"><head></head></html>";
304        let tags = build_og_tags(
305            html,
306            "Hello",
307            "World",
308            "https://example.com/page",
309            "website",
310        );
311        let joined = tags.join("\n");
312        assert!(joined.contains(r#"property="og:title" content="Hello""#));
313        assert!(joined.contains(r#"property="og:description" content="World""#));
314        assert!(joined.contains(r#"property="og:type" content="website""#));
315        assert!(joined.contains(
316            r#"property="og:url" content="https://example.com/page""#
317        ));
318        assert!(joined.contains(r#"property="og:locale" content="en""#));
319    }
320
321    #[test]
322    fn og_tags_skips_existing_tags() {
323        let html = r#"<html lang="en"><head>
324            <meta property="og:title" content="Existing">
325            <meta property="og:type" content="article">
326        </head></html>"#;
327        let tags = build_og_tags(
328            html,
329            "Hello",
330            "World",
331            "https://example.com",
332            "website",
333        );
334        let joined = tags.join("\n");
335        assert!(
336            !joined.contains(r#"property="og:title""#),
337            "should not duplicate og:title: {joined}"
338        );
339        assert!(
340            !joined.contains(r#"property="og:type""#),
341            "should not duplicate og:type"
342        );
343    }
344
345    #[test]
346    fn og_tags_falls_back_from_twitter_image_when_og_image_missing() {
347        let html = r#"<html><head>
348            <meta name="twitter:image" content="/twit.png">
349        </head></html>"#;
350        let tags = build_og_tags(html, "T", "D", "", "website");
351        let joined = tags.join("\n");
352        assert!(
353            joined.contains(r#"property="og:image" content="/twit.png""#),
354            "should reuse twitter:image when og:image absent: {joined}"
355        );
356        // and emit explicit dimensions for fast social card render
357        assert!(joined.contains(r#"property="og:image:width" content="1200""#));
358        assert!(joined.contains(r#"property="og:image:height" content="630""#));
359    }
360
361    #[test]
362    fn og_tags_locale_translates_html_lang_dashes_to_underscores() {
363        let html = "<html lang=\"en-GB\"><head></head></html>";
364        let tags = build_og_tags(html, "T", "D", "", "website");
365        let joined = tags.join("\n");
366        assert!(
367            joined.contains(r#"property="og:locale" content="en_GB""#),
368            "lang=\"en-GB\" should produce og:locale=\"en_GB\", got: {joined}"
369        );
370    }
371
372    #[test]
373    fn og_tags_omits_locale_when_html_has_no_lang() {
374        let html = "<html><head></head></html>";
375        let tags = build_og_tags(html, "T", "D", "", "website");
376        let joined = tags.join("\n");
377        assert!(
378            !joined.contains("og:locale"),
379            "no html lang → no og:locale, got: {joined}"
380        );
381    }
382
383    // ── build_twitter_tags ──────────────────────────────────────
384
385    #[test]
386    fn twitter_tags_includes_card_title_description() {
387        let html = "<html><head></head></html>";
388        let tags = build_twitter_tags(html, "T", "D", "summary");
389        let joined = tags.join("\n");
390        assert!(joined.contains(r#"name="twitter:card" content="summary""#));
391        assert!(joined.contains(r#"name="twitter:title" content="T""#));
392        assert!(joined.contains(r#"name="twitter:description" content="D""#));
393    }
394
395    #[test]
396    fn twitter_tags_falls_back_to_og_image_when_twitter_image_missing() {
397        let html = r#"<html><head>
398            <meta property="og:image" content="/og.png">
399        </head></html>"#;
400        let tags = build_twitter_tags(html, "T", "D", "summary");
401        let joined = tags.join("\n");
402        assert!(
403            joined.contains(r#"name="twitter:image" content="/og.png""#),
404            "should reuse og:image when twitter:image absent: {joined}"
405        );
406    }
407
408    // ── inject_seo_tags integration via after_compile ───────────
409
410    #[test]
411    fn transform_html_injects_tags() {
412        let dir = tempdir().unwrap();
413        let c = ctx(dir.path());
414
415        let html = r#"<!doctype html><html lang="en"><head><title>Hello</title></head>
416            <body><p>World is wide.</p></body></html>"#;
417
418        let after = SeoPlugin
419            .transform_html(html, Path::new("page.html"), &c)
420            .unwrap();
421        assert!(after.contains("og:title"));
422        assert!(after.contains("twitter:card"));
423        assert!(after.contains("name=\"description\""));
424    }
425
426    #[test]
427    fn transform_html_uses_article_type_when_article_tag_present() {
428        let dir = tempdir().unwrap();
429        let c = ctx(dir.path());
430
431        let html = r#"<!doctype html><html lang="en"><head><title>P</title></head>
432            <body><article><p>Content.</p></article></body></html>"#;
433
434        let after = SeoPlugin
435            .transform_html(html, Path::new("post.html"), &c)
436            .unwrap();
437        assert!(
438            after.contains(r#"og:type" content="article""#),
439            "presence of <article> should set og:type=article: {after}"
440        );
441        assert!(
442            after.contains(r#"twitter:card" content="summary_large_image""#),
443            "article should use summary_large_image twitter card: {after}"
444        );
445    }
446
447    #[test]
448    fn transform_html_is_idempotent() {
449        let dir = tempdir().unwrap();
450        let c = ctx(dir.path());
451
452        let html = r#"<html lang="en"><head><title>Y</title></head><body>Z</body></html>"#;
453
454        let first = SeoPlugin
455            .transform_html(html, Path::new("x.html"), &c)
456            .unwrap();
457        let second = SeoPlugin
458            .transform_html(&first, Path::new("x.html"), &c)
459            .unwrap();
460        assert_eq!(first, second, "second run must not duplicate meta tags");
461    }
462
463    #[test]
464    fn after_compile_no_op_when_no_html_files() {
465        let dir = tempdir().unwrap();
466        // Site dir exists but is empty.
467        SeoPlugin.after_compile(&ctx(dir.path())).unwrap();
468    }
469
470    #[test]
471    fn transform_html_handles_html_without_head_tag() {
472        let dir = tempdir().unwrap();
473        let c = ctx(dir.path());
474        let raw = "<!doctype html><html><body>only</body></html>";
475        let after = SeoPlugin
476            .transform_html(raw, Path::new("frag.html"), &c)
477            .unwrap();
478        assert_eq!(after, raw);
479    }
480}