Skip to main content

ssg/postprocess/
rss.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! RSS aggregate plugin.
5
6use super::helpers::{
7    extract_xml_value, parse_rfc2822_lenient, read_meta_sidecars, xml_escape,
8};
9use crate::plugin::{Plugin, PluginContext};
10use anyhow::{Context, Result};
11use std::fs;
12
13/// Aggregates per-page RSS items into the root `rss.xml` feed.
14#[derive(Debug, Clone, Copy)]
15pub struct RssAggregatePlugin;
16
17/// Builds a list of `(sort_key, xml_item)` pairs from metadata entries.
18fn collect_articles(
19    meta_entries: &[(String, std::collections::HashMap<String, String>)],
20    base_url: &str,
21) -> Vec<(String, String)> {
22    let mut articles: Vec<(String, String)> = Vec::new();
23    for (rel_path, meta) in meta_entries {
24        if rel_path.is_empty() {
25            continue;
26        }
27
28        let title = meta.get("title").cloned().unwrap_or_default();
29        let description = meta.get("description").cloned().unwrap_or_default();
30        let pub_date = meta.get("item_pub_date").cloned().unwrap_or_default();
31        let author = meta.get("author").cloned().unwrap_or_default();
32        let banner = meta.get("banner").or_else(|| meta.get("image")).cloned();
33        let category = meta.get("category").cloned();
34        let tags = meta.get("tags").cloned();
35
36        if title.is_empty() {
37            continue;
38        }
39
40        let link = if base_url.is_empty() {
41            format!("{rel_path}/")
42        } else {
43            format!("{base_url}/{rel_path}/")
44        };
45
46        let sort_key = parse_rfc2822_lenient(&pub_date)
47            .map_or_else(|| pub_date.clone(), |dt| dt.to_rfc3339());
48
49        let escaped_desc = xml_escape(&description);
50
51        // Build optional elements
52        let mut extras = String::new();
53
54        // Enclosure for banner/image (P2 fix)
55        if let Some(ref img) = banner {
56            let img_url = if img.starts_with("http") {
57                img.clone()
58            } else if !base_url.is_empty() {
59                format!("{base_url}/{}", img.trim_start_matches('/'))
60            } else {
61                img.clone()
62            };
63            let mime = if img_url.ends_with(".webp") {
64                "image/webp"
65            } else if img_url.ends_with(".png") {
66                "image/png"
67            } else {
68                "image/jpeg"
69            };
70            extras.push_str(&format!(
71                "\n      <enclosure url=\"{img_url}\" type=\"{mime}\" length=\"0\"/>"
72            ));
73        }
74
75        // Category elements (P2 fix)
76        if let Some(ref cat) = category {
77            extras.push_str(&format!(
78                "\n      <category>{}</category>",
79                xml_escape(cat)
80            ));
81        }
82        if let Some(ref t) = tags {
83            for tag in t.split(',') {
84                let tag = tag.trim();
85                if !tag.is_empty() {
86                    extras.push_str(&format!(
87                        "\n      <category>{}</category>",
88                        xml_escape(tag)
89                    ));
90                }
91            }
92        }
93
94        let item = format!(
95            r#"    <item>
96      <title>{title}</title>
97      <link>{link}</link>
98      <description>{escaped_desc}</description>
99      <guid isPermaLink="true">{link}</guid>
100      <pubDate>{pub_date}</pubDate>
101      <author>{author}</author>{extras}
102    </item>"#
103        );
104
105        articles.push((sort_key, item));
106    }
107    articles
108}
109
110/// Formats the final RSS XML channel document.
111fn build_rss_channel(
112    channel_title: &str,
113    channel_link: &str,
114    channel_desc: &str,
115    base_url: &str,
116    language: &str,
117    last_build_date: &str,
118    copyright: &str,
119    items_xml: &str,
120) -> String {
121    let mut channel_extras = String::new();
122    if !language.is_empty() {
123        channel_extras
124            .push_str(&format!("\n    <language>{language}</language>"));
125    }
126    if !last_build_date.is_empty() {
127        channel_extras.push_str(&format!(
128            "\n    <lastBuildDate>{last_build_date}</lastBuildDate>"
129        ));
130    }
131    if !copyright.is_empty() {
132        channel_extras.push_str(&format!(
133            "\n    <copyright>{}</copyright>",
134            xml_escape(copyright)
135        ));
136    }
137
138    format!(
139        r#"<?xml version="1.0" encoding="UTF-8"?>
140<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
141  <channel>
142    <title>{channel_title}</title>
143    <link>{channel_link}</link>
144    <description>{channel_desc}</description>
145    <atom:link href="{base_url}/rss.xml" rel="self" type="application/rss+xml"/>{channel_extras}
146{items_xml}
147  </channel>
148</rss>
149"#
150    )
151}
152
153impl Plugin for RssAggregatePlugin {
154    fn name(&self) -> &'static str {
155        "rss-aggregate"
156    }
157
158    fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
159        let rss_path = ctx.site_dir.join("rss.xml");
160        if !rss_path.exists() {
161            return Ok(());
162        }
163
164        let content = fs::read_to_string(&rss_path)
165            .with_context(|| format!("cannot read {}", rss_path.display()))?;
166
167        if content.matches("<item>").count() > 1 {
168            return Ok(());
169        }
170
171        let meta_entries =
172            read_meta_sidecars(&ctx.site_dir).unwrap_or_default();
173
174        let base_url = ctx
175            .config
176            .as_ref()
177            .map(|c| c.base_url.trim_end_matches('/').to_string())
178            .unwrap_or_default();
179
180        let language = extract_language(ctx);
181        let copyright = extract_copyright(&meta_entries);
182
183        let mut articles = collect_articles(&meta_entries, &base_url);
184        articles.sort_by(|a, b| b.0.cmp(&a.0));
185        articles.truncate(50);
186
187        if articles.is_empty() {
188            return Ok(());
189        }
190
191        let last_build_date = extract_last_build_date(&articles);
192
193        let items_xml: String = articles
194            .iter()
195            .map(|(_, xml)| xml.as_str())
196            .collect::<Vec<_>>()
197            .join("\n");
198
199        let channel_title = extract_xml_value(&content, "title")
200            .unwrap_or_else(|| "Untitled".to_string());
201        let channel_link = extract_xml_value(&content, "link")
202            .unwrap_or_else(|| base_url.clone());
203        let channel_desc =
204            extract_xml_value(&content, "description").unwrap_or_default();
205
206        let rebuilt = build_rss_channel(
207            &channel_title,
208            &channel_link,
209            &channel_desc,
210            &base_url,
211            &language,
212            &last_build_date,
213            &copyright,
214            &items_xml,
215        );
216
217        fs::write(&rss_path, rebuilt)
218            .with_context(|| format!("cannot write {}", rss_path.display()))?;
219
220        log::info!(
221            "[rss-aggregate] Rebuilt rss.xml with {} article items",
222            articles.len()
223        );
224        Ok(())
225    }
226}
227
228/// Extracts the language setting from the plugin context.
229fn extract_language(ctx: &PluginContext) -> String {
230    ctx.config
231        .as_ref()
232        .and_then(|c| {
233            if c.site_name.is_empty() {
234                None
235            } else {
236                Some("en".to_string())
237            }
238        })
239        .unwrap_or_else(|| "en".to_string())
240}
241
242/// Extracts the copyright string from meta entries.
243fn extract_copyright(
244    meta_entries: &[(String, std::collections::HashMap<String, String>)],
245) -> String {
246    meta_entries
247        .iter()
248        .find_map(|(_, m)| m.get("copyright").cloned())
249        .unwrap_or_default()
250}
251
252/// Extracts the last build date from the most recent article.
253fn extract_last_build_date(articles: &[(String, String)]) -> String {
254    articles
255        .first()
256        .and_then(|(_, xml)| {
257            xml.find("<pubDate>").and_then(|s| {
258                let after = &xml[s + 9..];
259                after.find("</pubDate>").map(|e| after[..e].to_string())
260            })
261        })
262        .unwrap_or_default()
263}
264
265#[cfg(test)]
266#[allow(clippy::unwrap_used, clippy::expect_used)]
267mod tests {
268
269    use super::*;
270    use crate::plugin::PluginContext;
271    use std::collections::HashMap;
272    use std::path::Path;
273    use tempfile::tempdir;
274
275    fn write_meta_sidecar(
276        dir: &Path,
277        slug: &str,
278        meta: &HashMap<String, String>,
279    ) {
280        let page_dir = dir.join(slug);
281        fs::create_dir_all(&page_dir).expect("create page dir");
282        let meta_path = page_dir.join("page.meta.json");
283        let json = serde_json::to_string(meta).expect("serialize meta");
284        fs::write(&meta_path, json).expect("write meta");
285    }
286
287    fn make_atom_ctx(site_dir: &Path) -> PluginContext {
288        crate::test_support::init_logger();
289        let config = crate::cmd::SsgConfig {
290            base_url: "https://example.com".to_string(),
291            site_name: "Test Site".to_string(),
292            site_title: "Test Site".to_string(),
293            site_description: "A test site".to_string(),
294            language: "en".to_string(),
295            content_dir: std::path::PathBuf::from("content"),
296            output_dir: std::path::PathBuf::from("build"),
297            template_dir: std::path::PathBuf::from("templates"),
298            serve_dir: None,
299            i18n: None,
300        };
301        PluginContext::with_config(
302            Path::new("content"),
303            Path::new("build"),
304            site_dir,
305            Path::new("templates"),
306            config,
307        )
308    }
309
310    fn test_ctx(site_dir: &Path) -> PluginContext {
311        crate::test_support::init_logger();
312        PluginContext::new(
313            Path::new("content"),
314            Path::new("build"),
315            site_dir,
316            Path::new("templates"),
317        )
318    }
319
320    #[test]
321    fn test_rss_aggregate_single_item_trigger() -> Result<()> {
322        let tmp = tempdir()?;
323        let rss_path = tmp.path().join("rss.xml");
324        fs::write(
325            &rss_path,
326            r#"<?xml version="1.0" encoding="UTF-8"?>
327<rss version="2.0">
328  <channel>
329    <title>My Site</title>
330    <link>https://example.com</link>
331    <description>A test site</description>
332    <item>
333      <title>Feed itself</title>
334      <link>https://example.com/rss.xml</link>
335    </item>
336  </channel>
337</rss>"#,
338        )?;
339
340        let ctx = test_ctx(tmp.path());
341        RssAggregatePlugin.after_compile(&ctx)?;
342        Ok(())
343    }
344
345    #[test]
346    fn test_rss_aggregate_with_full_metadata() -> Result<()> {
347        let tmp = tempdir()?;
348
349        let rss_path = tmp.path().join("rss.xml");
350        fs::write(
351            &rss_path,
352            r#"<?xml version="1.0" encoding="UTF-8"?>
353<rss version="2.0">
354  <channel>
355    <title>Test Blog</title>
356    <link>https://example.com</link>
357    <description>A test blog</description>
358    <item>
359      <title>Placeholder</title>
360    </item>
361  </channel>
362</rss>"#,
363        )?;
364
365        let mut meta = HashMap::new();
366        let _ = meta.insert("title".to_string(), "Article One".to_string());
367        let _ = meta.insert(
368            "description".to_string(),
369            "First article desc".to_string(),
370        );
371        let _ = meta.insert(
372            "item_pub_date".to_string(),
373            "Thu, 11 Apr 2026 06:06:06 +0000".to_string(),
374        );
375        let _ = meta.insert("author".to_string(), "Alice".to_string());
376        let _ = meta
377            .insert("banner".to_string(), "/images/banner.webp".to_string());
378        let _ = meta.insert("category".to_string(), "Technology".to_string());
379        let _ = meta.insert("tags".to_string(), "rust, web".to_string());
380        let _ = meta.insert(
381            "copyright".to_string(),
382            "Copyright 2026 Alice".to_string(),
383        );
384        write_meta_sidecar(tmp.path(), "article-one", &meta);
385
386        let ctx = make_atom_ctx(tmp.path());
387        RssAggregatePlugin.after_compile(&ctx)?;
388
389        let result = fs::read_to_string(&rss_path)?;
390
391        assert!(
392            result.contains(
393                "<enclosure url=\"https://example.com/images/banner.webp\""
394            ),
395            "Should have enclosure with base_url prefix: {result}"
396        );
397        assert!(
398            result.contains("type=\"image/webp\""),
399            "Should detect webp MIME type: {result}"
400        );
401        assert!(
402            result.contains("<category>Technology</category>"),
403            "Should have category element: {result}"
404        );
405        assert!(
406            result.contains("<category>rust</category>"),
407            "Should have tag category 'rust': {result}"
408        );
409        assert!(
410            result.contains("<category>web</category>"),
411            "Should have tag category 'web': {result}"
412        );
413        assert!(
414            result.contains("<language>en</language>"),
415            "Should have language element: {result}"
416        );
417        assert!(
418            result.contains("<lastBuildDate>"),
419            "Should have lastBuildDate: {result}"
420        );
421        assert!(
422            result.contains("<copyright>Copyright 2026 Alice</copyright>"),
423            "Should have copyright: {result}"
424        );
425
426        Ok(())
427    }
428
429    #[test]
430    fn test_rss_aggregate_banner_with_image_field() -> Result<()> {
431        let tmp = tempdir()?;
432
433        let rss_path = tmp.path().join("rss.xml");
434        fs::write(
435            &rss_path,
436            r#"<?xml version="1.0" encoding="UTF-8"?>
437<rss version="2.0"><channel><title>T</title><link>https://example.com</link><description>D</description><item><title>X</title></item></channel></rss>"#,
438        )?;
439
440        let mut meta = HashMap::new();
441        let _ = meta.insert("title".to_string(), "Image Test".to_string());
442        let _ =
443            meta.insert("description".to_string(), "Testing image".to_string());
444        let _ = meta.insert(
445            "item_pub_date".to_string(),
446            "Mon, 01 Sep 2025 12:00:00 +0000".to_string(),
447        );
448        let _ = meta.insert("author".to_string(), "Bob".to_string());
449        let _ = meta.insert(
450            "image".to_string(),
451            "https://cdn.example.com/photo.png".to_string(),
452        );
453        write_meta_sidecar(tmp.path(), "img-test", &meta);
454
455        let ctx = make_atom_ctx(tmp.path());
456        RssAggregatePlugin.after_compile(&ctx)?;
457
458        let result = fs::read_to_string(&rss_path)?;
459        assert!(
460            result.contains("url=\"https://cdn.example.com/photo.png\""),
461            "Should use absolute image URL as-is: {result}"
462        );
463        assert!(
464            result.contains("type=\"image/png\""),
465            "Should detect png MIME type: {result}"
466        );
467        Ok(())
468    }
469
470    #[test]
471    fn test_rss_aggregate_jpeg_mime() -> Result<()> {
472        let tmp = tempdir()?;
473
474        let rss_path = tmp.path().join("rss.xml");
475        fs::write(
476            &rss_path,
477            r#"<?xml version="1.0" encoding="UTF-8"?>
478<rss version="2.0"><channel><title>T</title><link>https://example.com</link><description>D</description><item><title>X</title></item></channel></rss>"#,
479        )?;
480
481        let mut meta = HashMap::new();
482        let _ = meta.insert("title".to_string(), "JPEG Test".to_string());
483        let _ = meta.insert("description".to_string(), "desc".to_string());
484        let _ = meta.insert(
485            "item_pub_date".to_string(),
486            "Mon, 01 Sep 2025 12:00:00 +0000".to_string(),
487        );
488        let _ = meta.insert("author".to_string(), "Carol".to_string());
489        let _ = meta.insert("banner".to_string(), "/img/photo.jpg".to_string());
490        write_meta_sidecar(tmp.path(), "jpeg-test", &meta);
491
492        let ctx = make_atom_ctx(tmp.path());
493        RssAggregatePlugin.after_compile(&ctx)?;
494
495        let result = fs::read_to_string(&rss_path)?;
496        assert!(
497            result.contains("type=\"image/jpeg\""),
498            "Should default to image/jpeg for .jpg: {result}"
499        );
500        Ok(())
501    }
502
503    #[test]
504    fn test_rss_aggregate_skips_multi_item() -> Result<()> {
505        let tmp = tempdir()?;
506
507        let rss_path = tmp.path().join("rss.xml");
508        let original = r#"<?xml version="1.0" encoding="UTF-8"?>
509<rss version="2.0"><channel><title>T</title><link>x</link><description>D</description>
510<item><title>A</title></item>
511<item><title>B</title></item>
512</channel></rss>"#;
513        fs::write(&rss_path, original)?;
514
515        let ctx = test_ctx(tmp.path());
516        RssAggregatePlugin.after_compile(&ctx)?;
517
518        let result = fs::read_to_string(&rss_path)?;
519        assert_eq!(result, original, "Should not modify feed with >1 items");
520        Ok(())
521    }
522
523    #[test]
524    fn test_collect_articles_empty_entries() {
525        let articles = collect_articles(&[], "https://example.com");
526        assert!(
527            articles.is_empty(),
528            "no meta entries should produce no articles"
529        );
530    }
531
532    #[test]
533    fn test_collect_articles_skips_empty_title() {
534        let mut meta = HashMap::new();
535        let _ =
536            meta.insert("description".to_string(), "no title here".to_string());
537        let entries = vec![("page".to_string(), meta)];
538        let articles = collect_articles(&entries, "https://example.com");
539        assert!(
540            articles.is_empty(),
541            "entries without title should be skipped"
542        );
543    }
544
545    #[test]
546    fn test_collect_articles_skips_empty_path() {
547        let mut meta = HashMap::new();
548        let _ = meta.insert("title".to_string(), "Has Title".to_string());
549        let entries = vec![(String::new(), meta)];
550        let articles = collect_articles(&entries, "https://example.com");
551        assert!(
552            articles.is_empty(),
553            "entries with empty path should be skipped"
554        );
555    }
556
557    #[test]
558    fn test_collect_articles_multiple_entries_sorted() {
559        let mut meta1 = HashMap::new();
560        let _ = meta1.insert("title".to_string(), "Older".to_string());
561        let _ = meta1.insert("description".to_string(), "old".to_string());
562        let _ = meta1.insert(
563            "item_pub_date".to_string(),
564            "Mon, 01 Jan 2024 00:00:00 +0000".to_string(),
565        );
566        let _ = meta1.insert("author".to_string(), "A".to_string());
567
568        let mut meta2 = HashMap::new();
569        let _ = meta2.insert("title".to_string(), "Newer".to_string());
570        let _ = meta2.insert("description".to_string(), "new".to_string());
571        let _ = meta2.insert(
572            "item_pub_date".to_string(),
573            "Wed, 01 Jan 2025 00:00:00 +0000".to_string(),
574        );
575        let _ = meta2.insert("author".to_string(), "B".to_string());
576
577        let entries = vec![
578            ("old-post".to_string(), meta1),
579            ("new-post".to_string(), meta2),
580        ];
581        let mut articles = collect_articles(&entries, "https://example.com");
582        assert_eq!(articles.len(), 2);
583
584        // Sort descending like the plugin does
585        articles.sort_by(|a, b| b.0.cmp(&a.0));
586        assert!(
587            articles[0].1.contains("<title>Newer</title>"),
588            "newest article should sort first"
589        );
590    }
591
592    #[test]
593    fn test_collect_articles_xml_escapes_description() {
594        let mut meta = HashMap::new();
595        let _ = meta.insert("title".to_string(), "Escape Test".to_string());
596        let _ = meta.insert(
597            "description".to_string(),
598            "Use <b>bold</b> & \"quotes\"".to_string(),
599        );
600        let _ = meta.insert("author".to_string(), "X".to_string());
601        let entries = vec![("esc".to_string(), meta)];
602        let articles = collect_articles(&entries, "");
603        assert_eq!(articles.len(), 1);
604        let xml = &articles[0].1;
605        assert!(
606            xml.contains("&lt;b&gt;bold&lt;/b&gt;"),
607            "angle brackets should be escaped: {xml}"
608        );
609        assert!(xml.contains("&amp;"), "ampersands should be escaped: {xml}");
610    }
611
612    #[test]
613    fn test_build_rss_channel_minimal() {
614        let result = build_rss_channel(
615            "Title",
616            "https://x.example",
617            "Desc",
618            "https://x.example",
619            "",
620            "",
621            "",
622            "",
623        );
624        assert!(result.contains("<title>Title</title>"));
625        assert!(result.contains("<link>https://x.example</link>"));
626        assert!(result.contains("<description>Desc</description>"));
627        assert!(
628            !result.contains("<language>"),
629            "no language when empty string supplied"
630        );
631        assert!(
632            !result.contains("<lastBuildDate>"),
633            "no lastBuildDate when empty string supplied"
634        );
635    }
636
637    #[test]
638    fn test_build_rss_channel_with_all_extras() {
639        let result = build_rss_channel(
640            "T",
641            "L",
642            "D",
643            "https://x.example",
644            "en",
645            "Mon, 01 Jan 2024 00:00:00 +0000",
646            "Copyright 2024 X",
647            "<item><title>A</title></item>",
648        );
649        assert!(result.contains("<language>en</language>"));
650        assert!(result.contains(
651            "<lastBuildDate>Mon, 01 Jan 2024 00:00:00 +0000</lastBuildDate>"
652        ));
653        assert!(result.contains("<copyright>Copyright 2024 X</copyright>"));
654        assert!(result.contains("<item><title>A</title></item>"));
655    }
656
657    #[test]
658    fn test_extract_last_build_date_from_articles() {
659        let articles = vec![
660            ("2025".to_string(), "<item><pubDate>Mon, 01 Sep 2025 12:00:00 +0000</pubDate></item>".to_string()),
661            ("2024".to_string(), "<item><pubDate>Mon, 01 Jan 2024 00:00:00 +0000</pubDate></item>".to_string()),
662        ];
663        let date = extract_last_build_date(&articles);
664        assert_eq!(date, "Mon, 01 Sep 2025 12:00:00 +0000");
665    }
666
667    #[test]
668    fn test_extract_last_build_date_empty() {
669        let articles: Vec<(String, String)> = vec![];
670        let date = extract_last_build_date(&articles);
671        assert!(date.is_empty());
672    }
673
674    #[test]
675    fn test_rss_no_file_is_noop() -> Result<()> {
676        let tmp = tempdir()?;
677        // No rss.xml exists
678        let ctx = test_ctx(tmp.path());
679        RssAggregatePlugin.after_compile(&ctx)?;
680        assert!(!tmp.path().join("rss.xml").exists());
681        Ok(())
682    }
683}