1use super::helpers::{read_meta_sidecars, rfc2822_to_iso8601, xml_escape};
7use crate::plugin::{Plugin, PluginContext};
8use anyhow::{Context, Result};
9use std::fs;
10
11#[derive(Debug, Clone, Copy)]
14pub struct NewsSitemapFixPlugin;
15
16impl Plugin for NewsSitemapFixPlugin {
17 fn name(&self) -> &'static str {
18 "news-sitemap-fix"
19 }
20
21 fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
22 let path = ctx.site_dir.join("news-sitemap.xml");
23 if !path.exists() {
24 return Ok(());
25 }
26
27 let content = fs::read_to_string(&path)
28 .with_context(|| format!("cannot read {}", path.display()))?;
29
30 if !content.contains("Unnamed Publication")
32 && !content.contains("Untitled Article")
33 && !content.contains("<loc></loc>")
34 {
35 return Ok(());
36 }
37
38 let meta_entries =
39 read_meta_sidecars(&ctx.site_dir).unwrap_or_default();
40
41 let base_url = ctx
43 .config
44 .as_ref()
45 .map(|c| c.base_url.trim_end_matches('/').to_string())
46 .unwrap_or_default();
47
48 let news_entries: Vec<String> = meta_entries
50 .iter()
51 .filter_map(|(rel_path, meta)| {
52 build_news_entry(rel_path, meta, &base_url)
53 })
54 .collect();
55
56 if news_entries.is_empty() {
57 return Ok(());
58 }
59
60 let rebuilt = format!(
62 r#"<?xml version="1.0" encoding="UTF-8"?>
63<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
64 xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">
65{}
66</urlset>
67"#,
68 news_entries.join("\n")
69 );
70
71 fs::write(&path, rebuilt)
72 .with_context(|| format!("cannot write {}", path.display()))?;
73
74 log::info!(
75 "[news-sitemap-fix] Rebuilt news-sitemap.xml with {} entries",
76 news_entries.len()
77 );
78 Ok(())
79 }
80}
81
82fn build_news_entry(
84 rel_path: &str,
85 meta: &std::collections::HashMap<String, String>,
86 base_url: &str,
87) -> Option<String> {
88 let title = meta.get("title").cloned().unwrap_or_default();
89 let name = meta
90 .get("author")
91 .or_else(|| meta.get("name"))
92 .cloned()
93 .unwrap_or_default();
94 let language = meta
95 .get("language")
96 .cloned()
97 .unwrap_or_else(|| "en".to_string());
98
99 if title.is_empty() || rel_path.is_empty() {
100 return None;
101 }
102
103 let pub_date = meta
104 .get("item_pub_date")
105 .map(|d| rfc2822_to_iso8601(d))
106 .unwrap_or_default();
107
108 let loc = if base_url.is_empty() {
109 format!("{rel_path}/index.html")
110 } else {
111 format!("{base_url}/{rel_path}/index.html")
112 };
113
114 let keywords = meta
115 .get("keywords")
116 .or_else(|| meta.get("tags"))
117 .cloned()
118 .unwrap_or_default();
119 let extras = if keywords.is_empty() {
120 String::new()
121 } else {
122 format!(
123 "\n <news:keywords>{}</news:keywords>",
124 xml_escape(&keywords)
125 )
126 };
127
128 Some(format!(
129 r"<url>
130 <loc>{loc}</loc>
131 <news:news>
132 <news:publication>
133 <news:name>{name}</news:name>
134 <news:language>{language}</news:language>
135 </news:publication>
136 <news:publication_date>{pub_date}</news:publication_date>
137 <news:title>{title}</news:title>{extras}
138 </news:news>
139</url>"
140 ))
141}
142
143#[cfg(test)]
144#[allow(clippy::unwrap_used, clippy::expect_used)]
145mod tests {
146
147 use super::*;
148 use crate::plugin::PluginContext;
149 use std::collections::HashMap;
150 use std::path::Path;
151 use tempfile::tempdir;
152
153 fn write_meta_sidecar(
154 dir: &Path,
155 slug: &str,
156 meta: &HashMap<String, String>,
157 ) {
158 let page_dir = dir.join(slug);
159 fs::create_dir_all(&page_dir).expect("create page dir");
160 let meta_path = page_dir.join("page.meta.json");
161 let json = serde_json::to_string(meta).expect("serialize meta");
162 fs::write(&meta_path, json).expect("write meta");
163 }
164
165 fn make_atom_ctx(site_dir: &Path) -> PluginContext {
166 crate::test_support::init_logger();
167 let config = crate::cmd::SsgConfig {
168 base_url: "https://example.com".to_string(),
169 site_name: "Test Site".to_string(),
170 site_title: "Test Site".to_string(),
171 site_description: "A test site".to_string(),
172 language: "en".to_string(),
173 content_dir: std::path::PathBuf::from("content"),
174 output_dir: std::path::PathBuf::from("build"),
175 template_dir: std::path::PathBuf::from("templates"),
176 serve_dir: None,
177 i18n: None,
178 };
179 PluginContext::with_config(
180 Path::new("content"),
181 Path::new("build"),
182 site_dir,
183 Path::new("templates"),
184 config,
185 )
186 }
187
188 fn test_ctx(site_dir: &Path) -> PluginContext {
189 crate::test_support::init_logger();
190 PluginContext::new(
191 Path::new("content"),
192 Path::new("build"),
193 site_dir,
194 Path::new("templates"),
195 )
196 }
197
198 #[test]
199 fn test_news_sitemap_with_keywords() -> Result<()> {
200 let tmp = tempdir()?;
201
202 let news_path = tmp.path().join("news-sitemap.xml");
203 fs::write(
204 &news_path,
205 r#"<?xml version="1.0" encoding="UTF-8"?>
206<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
207 xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">
208<url>
209 <loc></loc>
210 <news:news>
211 <news:publication>
212 <news:name>Unnamed Publication</news:name>
213 <news:language>en</news:language>
214 </news:publication>
215 <news:title>Untitled Article</news:title>
216 </news:news>
217</url>
218</urlset>"#,
219 )?;
220
221 let mut meta = HashMap::new();
222 let _ = meta.insert("title".to_string(), "Breaking News".to_string());
223 let _ = meta.insert("author".to_string(), "Reporter".to_string());
224 let _ = meta.insert(
225 "item_pub_date".to_string(),
226 "Thu, 11 Apr 2026 06:06:06 +0000".to_string(),
227 );
228 let _ = meta.insert(
229 "keywords".to_string(),
230 "rust, programming, web".to_string(),
231 );
232 let _ = meta.insert("language".to_string(), "fr".to_string());
233 write_meta_sidecar(tmp.path(), "breaking", &meta);
234
235 let ctx = make_atom_ctx(tmp.path());
236 NewsSitemapFixPlugin.after_compile(&ctx)?;
237
238 let result = fs::read_to_string(&news_path)?;
239 assert!(
240 result.contains(
241 "<news:keywords>rust, programming, web</news:keywords>"
242 ),
243 "Should inject keywords: {result}"
244 );
245 assert!(
246 result.contains("<news:name>Reporter</news:name>"),
247 "Should use author name: {result}"
248 );
249 assert!(
250 result.contains("<news:language>fr</news:language>"),
251 "Should use custom language: {result}"
252 );
253 assert!(
254 !result.contains("Unnamed Publication"),
255 "Should not have placeholder: {result}"
256 );
257 assert!(
258 !result.contains("Untitled Article"),
259 "Should not have placeholder: {result}"
260 );
261 Ok(())
262 }
263
264 #[test]
265 fn test_news_sitemap_with_tags_fallback() -> Result<()> {
266 let tmp = tempdir()?;
267
268 let news_path = tmp.path().join("news-sitemap.xml");
269 fs::write(
270 &news_path,
271 r#"<?xml version="1.0" encoding="UTF-8"?>
272<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
273 xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">
274<url>
275 <loc></loc>
276 <news:news>
277 <news:title>Untitled Article</news:title>
278 </news:news>
279</url>
280</urlset>"#,
281 )?;
282
283 let mut meta = HashMap::new();
284 let _ = meta.insert("title".to_string(), "Tagged Post".to_string());
285 let _ = meta.insert("author".to_string(), "Writer".to_string());
286 let _ = meta.insert(
287 "item_pub_date".to_string(),
288 "Mon, 01 Sep 2025 12:00:00 +0000".to_string(),
289 );
290 let _ = meta.insert("tags".to_string(), "tech, science".to_string());
291 write_meta_sidecar(tmp.path(), "tagged", &meta);
292
293 let ctx = make_atom_ctx(tmp.path());
294 NewsSitemapFixPlugin.after_compile(&ctx)?;
295
296 let result = fs::read_to_string(&news_path)?;
297 assert!(
298 result.contains("<news:keywords>tech, science</news:keywords>"),
299 "Should fall back to tags for keywords: {result}"
300 );
301 Ok(())
302 }
303
304 #[test]
305 fn test_news_sitemap_skips_when_no_placeholders() -> Result<()> {
306 let tmp = tempdir()?;
307
308 let news_path = tmp.path().join("news-sitemap.xml");
309 let original = r#"<?xml version="1.0" encoding="UTF-8"?>
310<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
311<url>
312 <loc>https://example.com/good</loc>
313 <news:news>
314 <news:title>Good Article</news:title>
315 </news:news>
316</url>
317</urlset>"#;
318 fs::write(&news_path, original)?;
319
320 let ctx = test_ctx(tmp.path());
321 NewsSitemapFixPlugin.after_compile(&ctx)?;
322
323 let result = fs::read_to_string(&news_path)?;
324 assert_eq!(
325 result, original,
326 "Should not modify well-formed news sitemap"
327 );
328 Ok(())
329 }
330
331 #[test]
332 fn test_build_news_entry_returns_none_for_empty_title() {
333 let meta = HashMap::new();
334 assert!(
335 build_news_entry("slug", &meta, "https://example.com").is_none(),
336 "empty title should produce None"
337 );
338 }
339
340 #[test]
341 fn test_build_news_entry_returns_none_for_empty_path() {
342 let mut meta = HashMap::new();
343 let _ = meta.insert("title".to_string(), "Hello".to_string());
344 assert!(
345 build_news_entry("", &meta, "https://example.com").is_none(),
346 "empty rel_path should produce None"
347 );
348 }
349
350 #[test]
351 fn test_build_news_entry_valid() {
352 let mut meta = HashMap::new();
353 let _ = meta.insert("title".to_string(), "My Article".to_string());
354 let _ = meta.insert("author".to_string(), "Author".to_string());
355 let _ = meta.insert(
356 "item_pub_date".to_string(),
357 "Thu, 11 Apr 2026 06:06:06 +0000".to_string(),
358 );
359 let entry =
360 build_news_entry("my-article", &meta, "https://example.com")
361 .expect("valid metadata should produce an entry");
362 assert!(entry
363 .contains("<loc>https://example.com/my-article/index.html</loc>"));
364 assert!(entry.contains("<news:name>Author</news:name>"));
365 assert!(entry.contains("<news:title>My Article</news:title>"));
366 assert!(entry.contains("<news:language>en</news:language>"));
367 }
368
369 #[test]
370 fn test_build_news_entry_without_base_url() {
371 let mut meta = HashMap::new();
372 let _ = meta.insert("title".to_string(), "Post".to_string());
373 let _ = meta.insert("name".to_string(), "Writer".to_string());
374 let entry = build_news_entry("post", &meta, "")
375 .expect("should produce entry without base_url");
376 assert!(
377 entry.contains("<loc>post/index.html</loc>"),
378 "loc should use relative path when base_url is empty: {entry}"
379 );
380 assert!(
381 entry.contains("<news:name>Writer</news:name>"),
382 "should fall back to 'name' field: {entry}"
383 );
384 }
385
386 #[test]
387 fn test_news_sitemap_no_file_is_noop() -> Result<()> {
388 let tmp = tempdir()?;
389 let ctx = test_ctx(tmp.path());
390 NewsSitemapFixPlugin.after_compile(&ctx)?;
391 assert!(!tmp.path().join("news-sitemap.xml").exists());
392 Ok(())
393 }
394
395 #[test]
396 fn test_news_sitemap_empty_entries_no_rebuild() -> Result<()> {
397 let tmp = tempdir()?;
398 let news_path = tmp.path().join("news-sitemap.xml");
399 let original = r#"<?xml version="1.0" encoding="UTF-8"?>
401<urlset><url><loc></loc><news:news><news:title>Untitled Article</news:title></news:news></url></urlset>"#;
402 fs::write(&news_path, original)?;
403
404 let ctx = test_ctx(tmp.path());
405 NewsSitemapFixPlugin.after_compile(&ctx)?;
406
407 let result = fs::read_to_string(&news_path)?;
408 assert_eq!(
409 result, original,
410 "should not modify when no meta entries produce valid news entries"
411 );
412 Ok(())
413 }
414}