1use crate::plugin::{Plugin, PluginContext};
7use anyhow::{Context, Result};
8use std::fs;
9
10#[derive(Debug, Clone)]
26pub struct RobotsPlugin {
27 base_url: String,
28}
29
30impl RobotsPlugin {
31 pub fn new(base_url: impl Into<String>) -> Self {
33 Self {
34 base_url: base_url.into(),
35 }
36 }
37}
38
39impl Plugin for RobotsPlugin {
40 fn name(&self) -> &'static str {
41 "robots"
42 }
43
44 fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
45 if !ctx.site_dir.exists() {
46 return Ok(());
47 }
48
49 let robots_path = ctx.site_dir.join("robots.txt");
50 if robots_path.exists() {
51 return Ok(());
52 }
53
54 let content = format!(
55 "User-agent: *\nAllow: /\nSitemap: {}/sitemap.xml\n",
56 self.base_url.trim_end_matches('/')
57 );
58
59 fs::write(&robots_path, content).with_context(|| {
60 format!("cannot write {}", robots_path.display())
61 })?;
62
63 Ok(())
64 }
65}
66
67#[cfg(test)]
68#[allow(clippy::unwrap_used, clippy::expect_used)]
69mod tests {
70
71 use super::*;
72 use std::path::Path;
73 use tempfile::tempdir;
74
75 fn ctx(site: &Path) -> PluginContext {
76 PluginContext::new(
77 Path::new("content"),
78 Path::new("build"),
79 site,
80 Path::new("templates"),
81 )
82 }
83
84 #[test]
85 fn name_is_stable() {
86 assert_eq!(RobotsPlugin::new("https://x.example").name(), "robots");
89 }
90
91 #[test]
92 fn new_accepts_string_or_str() {
93 let _from_str = RobotsPlugin::new("https://a.example");
95 let _from_string = RobotsPlugin::new(String::from("https://b.example"));
96 }
97
98 #[test]
99 fn writes_robots_txt_when_missing() {
100 let dir = tempdir().unwrap();
101 let plugin = RobotsPlugin::new("https://example.com");
102 plugin.after_compile(&ctx(dir.path())).unwrap();
103
104 let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
105 assert_eq!(
106 body,
107 "User-agent: *\nAllow: /\nSitemap: https://example.com/sitemap.xml\n"
108 );
109 }
110
111 #[test]
112 fn trims_trailing_slash_from_base_url() {
113 let dir = tempdir().unwrap();
114 let plugin = RobotsPlugin::new("https://example.com/");
115 plugin.after_compile(&ctx(dir.path())).unwrap();
116
117 let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
118 assert!(
119 body.contains("Sitemap: https://example.com/sitemap.xml\n"),
120 "trailing slash on base_url should be trimmed before joining \
121 /sitemap.xml, got: {body}"
122 );
123 assert!(
124 !body.contains("//sitemap.xml"),
125 "should not produce double-slash"
126 );
127 }
128
129 #[test]
130 fn does_not_overwrite_existing_robots_txt() {
131 let dir = tempdir().unwrap();
132 let custom = "User-agent: GPTBot\nDisallow: /\n";
133 fs::write(dir.path().join("robots.txt"), custom).unwrap();
134
135 let plugin = RobotsPlugin::new("https://example.com");
136 plugin.after_compile(&ctx(dir.path())).unwrap();
137
138 let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
139 assert_eq!(body, custom, "existing robots.txt must be left untouched");
140 }
141
142 #[test]
143 fn no_op_when_site_dir_missing() {
144 let dir = tempdir().unwrap();
146 let nonexistent = dir.path().join("nope");
147 let plugin = RobotsPlugin::new("https://example.com");
148 plugin.after_compile(&ctx(&nonexistent)).unwrap();
149 assert!(
150 !nonexistent.join("robots.txt").exists(),
151 "plugin should not create files in a missing site dir"
152 );
153 }
154
155 #[test]
156 fn robots_txt_with_custom_sitemap_url() {
157 let dir = tempdir().unwrap();
158 let plugin = RobotsPlugin::new("https://blog.example.org");
159 plugin.after_compile(&ctx(dir.path())).unwrap();
160
161 let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
162 assert!(
163 body.contains("Sitemap: https://blog.example.org/sitemap.xml"),
164 "sitemap URL should use the custom base_url: {body}"
165 );
166 }
167
168 #[test]
169 fn robots_txt_preserves_existing_disallow() {
170 let dir = tempdir().unwrap();
171 let custom = "User-agent: *\nDisallow: /admin/\nDisallow: /private/\n";
172 fs::write(dir.path().join("robots.txt"), custom).unwrap();
173
174 let plugin = RobotsPlugin::new("https://example.com");
175 plugin.after_compile(&ctx(dir.path())).unwrap();
176
177 let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
178 assert_eq!(
179 body, custom,
180 "existing robots.txt with disallow rules must not be overwritten"
181 );
182 assert!(body.contains("Disallow: /admin/"));
183 assert!(body.contains("Disallow: /private/"));
184 }
185}