Skip to main content

ssg/postprocess/
news_sitemap.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! News sitemap fix plugin.
5
6use super::helpers::{read_meta_sidecars, rfc2822_to_iso8601, xml_escape};
7use crate::plugin::{Plugin, PluginContext};
8use anyhow::{Context, Result};
9use std::fs;
10
11/// Repairs news-sitemap.xml by populating entries from front-matter
12/// metadata instead of using placeholder values.
13#[derive(Debug, Clone, Copy)]
14pub struct NewsSitemapFixPlugin;
15
16impl Plugin for NewsSitemapFixPlugin {
17    fn name(&self) -> &'static str {
18        "news-sitemap-fix"
19    }
20
21    fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
22        let path = ctx.site_dir.join("news-sitemap.xml");
23        if !path.exists() {
24            return Ok(());
25        }
26
27        let content = fs::read_to_string(&path)
28            .with_context(|| format!("cannot read {}", path.display()))?;
29
30        // If no placeholder issues, skip
31        if !content.contains("Unnamed Publication")
32            && !content.contains("Untitled Article")
33            && !content.contains("<loc></loc>")
34        {
35            return Ok(());
36        }
37
38        let meta_entries =
39            read_meta_sidecars(&ctx.site_dir).unwrap_or_default();
40
41        // Get base_url from config
42        let base_url = ctx
43            .config
44            .as_ref()
45            .map(|c| c.base_url.trim_end_matches('/').to_string())
46            .unwrap_or_default();
47
48        // Build news entries from metadata
49        let news_entries: Vec<String> = meta_entries
50            .iter()
51            .filter_map(|(rel_path, meta)| {
52                build_news_entry(rel_path, meta, &base_url)
53            })
54            .collect();
55
56        if news_entries.is_empty() {
57            return Ok(());
58        }
59
60        // Rebuild the news sitemap
61        let rebuilt = format!(
62            r#"<?xml version="1.0" encoding="UTF-8"?>
63<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
64        xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">
65{}
66</urlset>
67"#,
68            news_entries.join("\n")
69        );
70
71        fs::write(&path, rebuilt)
72            .with_context(|| format!("cannot write {}", path.display()))?;
73
74        log::info!(
75            "[news-sitemap-fix] Rebuilt news-sitemap.xml with {} entries",
76            news_entries.len()
77        );
78        Ok(())
79    }
80}
81
82/// Builds a single `<url>` entry for the news sitemap from metadata.
83fn build_news_entry(
84    rel_path: &str,
85    meta: &std::collections::HashMap<String, String>,
86    base_url: &str,
87) -> Option<String> {
88    let title = meta.get("title").cloned().unwrap_or_default();
89    let name = meta
90        .get("author")
91        .or_else(|| meta.get("name"))
92        .cloned()
93        .unwrap_or_default();
94    let language = meta
95        .get("language")
96        .cloned()
97        .unwrap_or_else(|| "en".to_string());
98
99    if title.is_empty() || rel_path.is_empty() {
100        return None;
101    }
102
103    let pub_date = meta
104        .get("item_pub_date")
105        .map(|d| rfc2822_to_iso8601(d))
106        .unwrap_or_default();
107
108    let loc = if base_url.is_empty() {
109        format!("{rel_path}/index.html")
110    } else {
111        format!("{base_url}/{rel_path}/index.html")
112    };
113
114    let keywords = meta
115        .get("keywords")
116        .or_else(|| meta.get("tags"))
117        .cloned()
118        .unwrap_or_default();
119    let extras = if keywords.is_empty() {
120        String::new()
121    } else {
122        format!(
123            "\n    <news:keywords>{}</news:keywords>",
124            xml_escape(&keywords)
125        )
126    };
127
128    Some(format!(
129        r"<url>
130  <loc>{loc}</loc>
131  <news:news>
132    <news:publication>
133      <news:name>{name}</news:name>
134      <news:language>{language}</news:language>
135    </news:publication>
136    <news:publication_date>{pub_date}</news:publication_date>
137    <news:title>{title}</news:title>{extras}
138  </news:news>
139</url>"
140    ))
141}
142
143#[cfg(test)]
144#[allow(clippy::unwrap_used, clippy::expect_used)]
145mod tests {
146
147    use super::*;
148    use crate::plugin::PluginContext;
149    use std::collections::HashMap;
150    use std::path::Path;
151    use tempfile::tempdir;
152
153    fn write_meta_sidecar(
154        dir: &Path,
155        slug: &str,
156        meta: &HashMap<String, String>,
157    ) {
158        let page_dir = dir.join(slug);
159        fs::create_dir_all(&page_dir).expect("create page dir");
160        let meta_path = page_dir.join("page.meta.json");
161        let json = serde_json::to_string(meta).expect("serialize meta");
162        fs::write(&meta_path, json).expect("write meta");
163    }
164
165    fn make_atom_ctx(site_dir: &Path) -> PluginContext {
166        crate::test_support::init_logger();
167        let config = crate::cmd::SsgConfig {
168            base_url: "https://example.com".to_string(),
169            site_name: "Test Site".to_string(),
170            site_title: "Test Site".to_string(),
171            site_description: "A test site".to_string(),
172            language: "en".to_string(),
173            content_dir: std::path::PathBuf::from("content"),
174            output_dir: std::path::PathBuf::from("build"),
175            template_dir: std::path::PathBuf::from("templates"),
176            serve_dir: None,
177            i18n: None,
178        };
179        PluginContext::with_config(
180            Path::new("content"),
181            Path::new("build"),
182            site_dir,
183            Path::new("templates"),
184            config,
185        )
186    }
187
188    fn test_ctx(site_dir: &Path) -> PluginContext {
189        crate::test_support::init_logger();
190        PluginContext::new(
191            Path::new("content"),
192            Path::new("build"),
193            site_dir,
194            Path::new("templates"),
195        )
196    }
197
198    #[test]
199    fn test_news_sitemap_with_keywords() -> Result<()> {
200        let tmp = tempdir()?;
201
202        let news_path = tmp.path().join("news-sitemap.xml");
203        fs::write(
204            &news_path,
205            r#"<?xml version="1.0" encoding="UTF-8"?>
206<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
207        xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">
208<url>
209  <loc></loc>
210  <news:news>
211    <news:publication>
212      <news:name>Unnamed Publication</news:name>
213      <news:language>en</news:language>
214    </news:publication>
215    <news:title>Untitled Article</news:title>
216  </news:news>
217</url>
218</urlset>"#,
219        )?;
220
221        let mut meta = HashMap::new();
222        let _ = meta.insert("title".to_string(), "Breaking News".to_string());
223        let _ = meta.insert("author".to_string(), "Reporter".to_string());
224        let _ = meta.insert(
225            "item_pub_date".to_string(),
226            "Thu, 11 Apr 2026 06:06:06 +0000".to_string(),
227        );
228        let _ = meta.insert(
229            "keywords".to_string(),
230            "rust, programming, web".to_string(),
231        );
232        let _ = meta.insert("language".to_string(), "fr".to_string());
233        write_meta_sidecar(tmp.path(), "breaking", &meta);
234
235        let ctx = make_atom_ctx(tmp.path());
236        NewsSitemapFixPlugin.after_compile(&ctx)?;
237
238        let result = fs::read_to_string(&news_path)?;
239        assert!(
240            result.contains(
241                "<news:keywords>rust, programming, web</news:keywords>"
242            ),
243            "Should inject keywords: {result}"
244        );
245        assert!(
246            result.contains("<news:name>Reporter</news:name>"),
247            "Should use author name: {result}"
248        );
249        assert!(
250            result.contains("<news:language>fr</news:language>"),
251            "Should use custom language: {result}"
252        );
253        assert!(
254            !result.contains("Unnamed Publication"),
255            "Should not have placeholder: {result}"
256        );
257        assert!(
258            !result.contains("Untitled Article"),
259            "Should not have placeholder: {result}"
260        );
261        Ok(())
262    }
263
264    #[test]
265    fn test_news_sitemap_with_tags_fallback() -> Result<()> {
266        let tmp = tempdir()?;
267
268        let news_path = tmp.path().join("news-sitemap.xml");
269        fs::write(
270            &news_path,
271            r#"<?xml version="1.0" encoding="UTF-8"?>
272<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
273        xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">
274<url>
275  <loc></loc>
276  <news:news>
277    <news:title>Untitled Article</news:title>
278  </news:news>
279</url>
280</urlset>"#,
281        )?;
282
283        let mut meta = HashMap::new();
284        let _ = meta.insert("title".to_string(), "Tagged Post".to_string());
285        let _ = meta.insert("author".to_string(), "Writer".to_string());
286        let _ = meta.insert(
287            "item_pub_date".to_string(),
288            "Mon, 01 Sep 2025 12:00:00 +0000".to_string(),
289        );
290        let _ = meta.insert("tags".to_string(), "tech, science".to_string());
291        write_meta_sidecar(tmp.path(), "tagged", &meta);
292
293        let ctx = make_atom_ctx(tmp.path());
294        NewsSitemapFixPlugin.after_compile(&ctx)?;
295
296        let result = fs::read_to_string(&news_path)?;
297        assert!(
298            result.contains("<news:keywords>tech, science</news:keywords>"),
299            "Should fall back to tags for keywords: {result}"
300        );
301        Ok(())
302    }
303
304    #[test]
305    fn test_news_sitemap_skips_when_no_placeholders() -> Result<()> {
306        let tmp = tempdir()?;
307
308        let news_path = tmp.path().join("news-sitemap.xml");
309        let original = r#"<?xml version="1.0" encoding="UTF-8"?>
310<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
311<url>
312  <loc>https://example.com/good</loc>
313  <news:news>
314    <news:title>Good Article</news:title>
315  </news:news>
316</url>
317</urlset>"#;
318        fs::write(&news_path, original)?;
319
320        let ctx = test_ctx(tmp.path());
321        NewsSitemapFixPlugin.after_compile(&ctx)?;
322
323        let result = fs::read_to_string(&news_path)?;
324        assert_eq!(
325            result, original,
326            "Should not modify well-formed news sitemap"
327        );
328        Ok(())
329    }
330
331    #[test]
332    fn test_build_news_entry_returns_none_for_empty_title() {
333        let meta = HashMap::new();
334        assert!(
335            build_news_entry("slug", &meta, "https://example.com").is_none(),
336            "empty title should produce None"
337        );
338    }
339
340    #[test]
341    fn test_build_news_entry_returns_none_for_empty_path() {
342        let mut meta = HashMap::new();
343        let _ = meta.insert("title".to_string(), "Hello".to_string());
344        assert!(
345            build_news_entry("", &meta, "https://example.com").is_none(),
346            "empty rel_path should produce None"
347        );
348    }
349
350    #[test]
351    fn test_build_news_entry_valid() {
352        let mut meta = HashMap::new();
353        let _ = meta.insert("title".to_string(), "My Article".to_string());
354        let _ = meta.insert("author".to_string(), "Author".to_string());
355        let _ = meta.insert(
356            "item_pub_date".to_string(),
357            "Thu, 11 Apr 2026 06:06:06 +0000".to_string(),
358        );
359        let entry =
360            build_news_entry("my-article", &meta, "https://example.com")
361                .expect("valid metadata should produce an entry");
362        assert!(entry
363            .contains("<loc>https://example.com/my-article/index.html</loc>"));
364        assert!(entry.contains("<news:name>Author</news:name>"));
365        assert!(entry.contains("<news:title>My Article</news:title>"));
366        assert!(entry.contains("<news:language>en</news:language>"));
367    }
368
369    #[test]
370    fn test_build_news_entry_without_base_url() {
371        let mut meta = HashMap::new();
372        let _ = meta.insert("title".to_string(), "Post".to_string());
373        let _ = meta.insert("name".to_string(), "Writer".to_string());
374        let entry = build_news_entry("post", &meta, "")
375            .expect("should produce entry without base_url");
376        assert!(
377            entry.contains("<loc>post/index.html</loc>"),
378            "loc should use relative path when base_url is empty: {entry}"
379        );
380        assert!(
381            entry.contains("<news:name>Writer</news:name>"),
382            "should fall back to 'name' field: {entry}"
383        );
384    }
385
386    #[test]
387    fn test_news_sitemap_no_file_is_noop() -> Result<()> {
388        let tmp = tempdir()?;
389        let ctx = test_ctx(tmp.path());
390        NewsSitemapFixPlugin.after_compile(&ctx)?;
391        assert!(!tmp.path().join("news-sitemap.xml").exists());
392        Ok(())
393    }
394
395    #[test]
396    fn test_news_sitemap_empty_entries_no_rebuild() -> Result<()> {
397        let tmp = tempdir()?;
398        let news_path = tmp.path().join("news-sitemap.xml");
399        // Has placeholder but no meta sidecars to rebuild from
400        let original = r#"<?xml version="1.0" encoding="UTF-8"?>
401<urlset><url><loc></loc><news:news><news:title>Untitled Article</news:title></news:news></url></urlset>"#;
402        fs::write(&news_path, original)?;
403
404        let ctx = test_ctx(tmp.path());
405        NewsSitemapFixPlugin.after_compile(&ctx)?;
406
407        let result = fs::read_to_string(&news_path)?;
408        assert_eq!(
409            result, original,
410            "should not modify when no meta entries produce valid news entries"
411        );
412        Ok(())
413    }
414}