Skip to main content

ssg/seo/
robots.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! robots.txt generation plugin.
5
6use crate::plugin::{Plugin, PluginContext};
7use anyhow::{Context, Result};
8use std::fs;
9
10/// Generates a `robots.txt` file in the site directory.
11///
12/// The file allows all user agents and references the sitemap at
13/// `{base_url}/sitemap.xml`. If a `robots.txt` already exists, it is
14/// not overwritten.
15///
16/// # Example
17///
18/// ```rust
19/// use ssg::plugin::PluginManager;
20/// use ssg::seo::RobotsPlugin;
21///
22/// let mut pm = PluginManager::new();
23/// pm.register(RobotsPlugin::new("https://example.com"));
24/// ```
25#[derive(Debug, Clone)]
26pub struct RobotsPlugin {
27    base_url: String,
28}
29
30impl RobotsPlugin {
31    /// Creates a new `RobotsPlugin` with the given base URL.
32    pub fn new(base_url: impl Into<String>) -> Self {
33        Self {
34            base_url: base_url.into(),
35        }
36    }
37}
38
39impl Plugin for RobotsPlugin {
40    fn name(&self) -> &'static str {
41        "robots"
42    }
43
44    fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
45        if !ctx.site_dir.exists() {
46            return Ok(());
47        }
48
49        let robots_path = ctx.site_dir.join("robots.txt");
50        if robots_path.exists() {
51            return Ok(());
52        }
53
54        let content = format!(
55            "User-agent: *\nAllow: /\nSitemap: {}/sitemap.xml\n",
56            self.base_url.trim_end_matches('/')
57        );
58
59        fs::write(&robots_path, content).with_context(|| {
60            format!("cannot write {}", robots_path.display())
61        })?;
62
63        Ok(())
64    }
65}
66
67#[cfg(test)]
68#[allow(clippy::unwrap_used, clippy::expect_used)]
69mod tests {
70
71    use super::*;
72    use std::path::Path;
73    use tempfile::tempdir;
74
75    fn ctx(site: &Path) -> PluginContext {
76        PluginContext::new(
77            Path::new("content"),
78            Path::new("build"),
79            site,
80            Path::new("templates"),
81        )
82    }
83
84    #[test]
85    fn name_is_stable() {
86        // Plugin name is part of the public contract — log lines and
87        // PluginManager APIs key off it. Pin the value.
88        assert_eq!(RobotsPlugin::new("https://x.example").name(), "robots");
89    }
90
91    #[test]
92    fn new_accepts_string_or_str() {
93        // Both `&str` and `String` should work via `impl Into<String>`.
94        let _from_str = RobotsPlugin::new("https://a.example");
95        let _from_string = RobotsPlugin::new(String::from("https://b.example"));
96    }
97
98    #[test]
99    fn writes_robots_txt_when_missing() {
100        let dir = tempdir().unwrap();
101        let plugin = RobotsPlugin::new("https://example.com");
102        plugin.after_compile(&ctx(dir.path())).unwrap();
103
104        let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
105        assert_eq!(
106            body,
107            "User-agent: *\nAllow: /\nSitemap: https://example.com/sitemap.xml\n"
108        );
109    }
110
111    #[test]
112    fn trims_trailing_slash_from_base_url() {
113        let dir = tempdir().unwrap();
114        let plugin = RobotsPlugin::new("https://example.com/");
115        plugin.after_compile(&ctx(dir.path())).unwrap();
116
117        let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
118        assert!(
119            body.contains("Sitemap: https://example.com/sitemap.xml\n"),
120            "trailing slash on base_url should be trimmed before joining \
121             /sitemap.xml, got: {body}"
122        );
123        assert!(
124            !body.contains("//sitemap.xml"),
125            "should not produce double-slash"
126        );
127    }
128
129    #[test]
130    fn does_not_overwrite_existing_robots_txt() {
131        let dir = tempdir().unwrap();
132        let custom = "User-agent: GPTBot\nDisallow: /\n";
133        fs::write(dir.path().join("robots.txt"), custom).unwrap();
134
135        let plugin = RobotsPlugin::new("https://example.com");
136        plugin.after_compile(&ctx(dir.path())).unwrap();
137
138        let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
139        assert_eq!(body, custom, "existing robots.txt must be left untouched");
140    }
141
142    #[test]
143    fn no_op_when_site_dir_missing() {
144        // Site dir doesn't exist — plugin must succeed silently.
145        let dir = tempdir().unwrap();
146        let nonexistent = dir.path().join("nope");
147        let plugin = RobotsPlugin::new("https://example.com");
148        plugin.after_compile(&ctx(&nonexistent)).unwrap();
149        assert!(
150            !nonexistent.join("robots.txt").exists(),
151            "plugin should not create files in a missing site dir"
152        );
153    }
154
155    #[test]
156    fn robots_txt_with_custom_sitemap_url() {
157        let dir = tempdir().unwrap();
158        let plugin = RobotsPlugin::new("https://blog.example.org");
159        plugin.after_compile(&ctx(dir.path())).unwrap();
160
161        let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
162        assert!(
163            body.contains("Sitemap: https://blog.example.org/sitemap.xml"),
164            "sitemap URL should use the custom base_url: {body}"
165        );
166    }
167
168    #[test]
169    fn robots_txt_preserves_existing_disallow() {
170        let dir = tempdir().unwrap();
171        let custom = "User-agent: *\nDisallow: /admin/\nDisallow: /private/\n";
172        fs::write(dir.path().join("robots.txt"), custom).unwrap();
173
174        let plugin = RobotsPlugin::new("https://example.com");
175        plugin.after_compile(&ctx(dir.path())).unwrap();
176
177        let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
178        assert_eq!(
179            body, custom,
180            "existing robots.txt with disallow rules must not be overwritten"
181        );
182        assert!(body.contains("Disallow: /admin/"));
183        assert!(body.contains("Disallow: /private/"));
184    }
185}