1use super::helpers::{
7 extract_xml_value, parse_rfc2822_lenient, read_meta_sidecars, xml_escape,
8};
9use crate::plugin::{Plugin, PluginContext};
10use anyhow::{Context, Result};
11use std::fs;
12
13#[derive(Debug, Clone, Copy)]
15pub struct RssAggregatePlugin;
16
17fn collect_articles(
19 meta_entries: &[(String, std::collections::HashMap<String, String>)],
20 base_url: &str,
21) -> Vec<(String, String)> {
22 let mut articles: Vec<(String, String)> = Vec::new();
23 for (rel_path, meta) in meta_entries {
24 if rel_path.is_empty() {
25 continue;
26 }
27
28 let title = meta.get("title").cloned().unwrap_or_default();
29 let description = meta.get("description").cloned().unwrap_or_default();
30 let pub_date = meta.get("item_pub_date").cloned().unwrap_or_default();
31 let author = meta.get("author").cloned().unwrap_or_default();
32 let banner = meta.get("banner").or_else(|| meta.get("image")).cloned();
33 let category = meta.get("category").cloned();
34 let tags = meta.get("tags").cloned();
35
36 if title.is_empty() {
37 continue;
38 }
39
40 let link = if base_url.is_empty() {
41 format!("{rel_path}/")
42 } else {
43 format!("{base_url}/{rel_path}/")
44 };
45
46 let sort_key = parse_rfc2822_lenient(&pub_date)
47 .map_or_else(|| pub_date.clone(), |dt| dt.to_rfc3339());
48
49 let escaped_desc = xml_escape(&description);
50
51 let mut extras = String::new();
53
54 if let Some(ref img) = banner {
56 let img_url = if img.starts_with("http") {
57 img.clone()
58 } else if !base_url.is_empty() {
59 format!("{base_url}/{}", img.trim_start_matches('/'))
60 } else {
61 img.clone()
62 };
63 let mime = if img_url.ends_with(".webp") {
64 "image/webp"
65 } else if img_url.ends_with(".png") {
66 "image/png"
67 } else {
68 "image/jpeg"
69 };
70 extras.push_str(&format!(
71 "\n <enclosure url=\"{img_url}\" type=\"{mime}\" length=\"0\"/>"
72 ));
73 }
74
75 if let Some(ref cat) = category {
77 extras.push_str(&format!(
78 "\n <category>{}</category>",
79 xml_escape(cat)
80 ));
81 }
82 if let Some(ref t) = tags {
83 for tag in t.split(',') {
84 let tag = tag.trim();
85 if !tag.is_empty() {
86 extras.push_str(&format!(
87 "\n <category>{}</category>",
88 xml_escape(tag)
89 ));
90 }
91 }
92 }
93
94 let item = format!(
95 r#" <item>
96 <title>{title}</title>
97 <link>{link}</link>
98 <description>{escaped_desc}</description>
99 <guid isPermaLink="true">{link}</guid>
100 <pubDate>{pub_date}</pubDate>
101 <author>{author}</author>{extras}
102 </item>"#
103 );
104
105 articles.push((sort_key, item));
106 }
107 articles
108}
109
110fn build_rss_channel(
112 channel_title: &str,
113 channel_link: &str,
114 channel_desc: &str,
115 base_url: &str,
116 language: &str,
117 last_build_date: &str,
118 copyright: &str,
119 items_xml: &str,
120) -> String {
121 let mut channel_extras = String::new();
122 if !language.is_empty() {
123 channel_extras
124 .push_str(&format!("\n <language>{language}</language>"));
125 }
126 if !last_build_date.is_empty() {
127 channel_extras.push_str(&format!(
128 "\n <lastBuildDate>{last_build_date}</lastBuildDate>"
129 ));
130 }
131 if !copyright.is_empty() {
132 channel_extras.push_str(&format!(
133 "\n <copyright>{}</copyright>",
134 xml_escape(copyright)
135 ));
136 }
137
138 format!(
139 r#"<?xml version="1.0" encoding="UTF-8"?>
140<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
141 <channel>
142 <title>{channel_title}</title>
143 <link>{channel_link}</link>
144 <description>{channel_desc}</description>
145 <atom:link href="{base_url}/rss.xml" rel="self" type="application/rss+xml"/>{channel_extras}
146{items_xml}
147 </channel>
148</rss>
149"#
150 )
151}
152
153impl Plugin for RssAggregatePlugin {
154 fn name(&self) -> &'static str {
155 "rss-aggregate"
156 }
157
158 fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
159 let rss_path = ctx.site_dir.join("rss.xml");
160 if !rss_path.exists() {
161 return Ok(());
162 }
163
164 let content = fs::read_to_string(&rss_path)
165 .with_context(|| format!("cannot read {}", rss_path.display()))?;
166
167 if content.matches("<item>").count() > 1 {
168 return Ok(());
169 }
170
171 let meta_entries =
172 read_meta_sidecars(&ctx.site_dir).unwrap_or_default();
173
174 let base_url = ctx
175 .config
176 .as_ref()
177 .map(|c| c.base_url.trim_end_matches('/').to_string())
178 .unwrap_or_default();
179
180 let language = extract_language(ctx);
181 let copyright = extract_copyright(&meta_entries);
182
183 let mut articles = collect_articles(&meta_entries, &base_url);
184 articles.sort_by(|a, b| b.0.cmp(&a.0));
185 articles.truncate(50);
186
187 if articles.is_empty() {
188 return Ok(());
189 }
190
191 let last_build_date = extract_last_build_date(&articles);
192
193 let items_xml: String = articles
194 .iter()
195 .map(|(_, xml)| xml.as_str())
196 .collect::<Vec<_>>()
197 .join("\n");
198
199 let channel_title = extract_xml_value(&content, "title")
200 .unwrap_or_else(|| "Untitled".to_string());
201 let channel_link = extract_xml_value(&content, "link")
202 .unwrap_or_else(|| base_url.clone());
203 let channel_desc =
204 extract_xml_value(&content, "description").unwrap_or_default();
205
206 let rebuilt = build_rss_channel(
207 &channel_title,
208 &channel_link,
209 &channel_desc,
210 &base_url,
211 &language,
212 &last_build_date,
213 ©right,
214 &items_xml,
215 );
216
217 fs::write(&rss_path, rebuilt)
218 .with_context(|| format!("cannot write {}", rss_path.display()))?;
219
220 log::info!(
221 "[rss-aggregate] Rebuilt rss.xml with {} article items",
222 articles.len()
223 );
224 Ok(())
225 }
226}
227
228fn extract_language(ctx: &PluginContext) -> String {
230 ctx.config
231 .as_ref()
232 .and_then(|c| {
233 if c.site_name.is_empty() {
234 None
235 } else {
236 Some("en".to_string())
237 }
238 })
239 .unwrap_or_else(|| "en".to_string())
240}
241
242fn extract_copyright(
244 meta_entries: &[(String, std::collections::HashMap<String, String>)],
245) -> String {
246 meta_entries
247 .iter()
248 .find_map(|(_, m)| m.get("copyright").cloned())
249 .unwrap_or_default()
250}
251
252fn extract_last_build_date(articles: &[(String, String)]) -> String {
254 articles
255 .first()
256 .and_then(|(_, xml)| {
257 xml.find("<pubDate>").and_then(|s| {
258 let after = &xml[s + 9..];
259 after.find("</pubDate>").map(|e| after[..e].to_string())
260 })
261 })
262 .unwrap_or_default()
263}
264
265#[cfg(test)]
266#[allow(clippy::unwrap_used, clippy::expect_used)]
267mod tests {
268
269 use super::*;
270 use crate::plugin::PluginContext;
271 use std::collections::HashMap;
272 use std::path::Path;
273 use tempfile::tempdir;
274
275 fn write_meta_sidecar(
276 dir: &Path,
277 slug: &str,
278 meta: &HashMap<String, String>,
279 ) {
280 let page_dir = dir.join(slug);
281 fs::create_dir_all(&page_dir).expect("create page dir");
282 let meta_path = page_dir.join("page.meta.json");
283 let json = serde_json::to_string(meta).expect("serialize meta");
284 fs::write(&meta_path, json).expect("write meta");
285 }
286
287 fn make_atom_ctx(site_dir: &Path) -> PluginContext {
288 crate::test_support::init_logger();
289 let config = crate::cmd::SsgConfig {
290 base_url: "https://example.com".to_string(),
291 site_name: "Test Site".to_string(),
292 site_title: "Test Site".to_string(),
293 site_description: "A test site".to_string(),
294 language: "en".to_string(),
295 content_dir: std::path::PathBuf::from("content"),
296 output_dir: std::path::PathBuf::from("build"),
297 template_dir: std::path::PathBuf::from("templates"),
298 serve_dir: None,
299 i18n: None,
300 };
301 PluginContext::with_config(
302 Path::new("content"),
303 Path::new("build"),
304 site_dir,
305 Path::new("templates"),
306 config,
307 )
308 }
309
310 fn test_ctx(site_dir: &Path) -> PluginContext {
311 crate::test_support::init_logger();
312 PluginContext::new(
313 Path::new("content"),
314 Path::new("build"),
315 site_dir,
316 Path::new("templates"),
317 )
318 }
319
320 #[test]
321 fn test_rss_aggregate_single_item_trigger() -> Result<()> {
322 let tmp = tempdir()?;
323 let rss_path = tmp.path().join("rss.xml");
324 fs::write(
325 &rss_path,
326 r#"<?xml version="1.0" encoding="UTF-8"?>
327<rss version="2.0">
328 <channel>
329 <title>My Site</title>
330 <link>https://example.com</link>
331 <description>A test site</description>
332 <item>
333 <title>Feed itself</title>
334 <link>https://example.com/rss.xml</link>
335 </item>
336 </channel>
337</rss>"#,
338 )?;
339
340 let ctx = test_ctx(tmp.path());
341 RssAggregatePlugin.after_compile(&ctx)?;
342 Ok(())
343 }
344
345 #[test]
346 fn test_rss_aggregate_with_full_metadata() -> Result<()> {
347 let tmp = tempdir()?;
348
349 let rss_path = tmp.path().join("rss.xml");
350 fs::write(
351 &rss_path,
352 r#"<?xml version="1.0" encoding="UTF-8"?>
353<rss version="2.0">
354 <channel>
355 <title>Test Blog</title>
356 <link>https://example.com</link>
357 <description>A test blog</description>
358 <item>
359 <title>Placeholder</title>
360 </item>
361 </channel>
362</rss>"#,
363 )?;
364
365 let mut meta = HashMap::new();
366 let _ = meta.insert("title".to_string(), "Article One".to_string());
367 let _ = meta.insert(
368 "description".to_string(),
369 "First article desc".to_string(),
370 );
371 let _ = meta.insert(
372 "item_pub_date".to_string(),
373 "Thu, 11 Apr 2026 06:06:06 +0000".to_string(),
374 );
375 let _ = meta.insert("author".to_string(), "Alice".to_string());
376 let _ = meta
377 .insert("banner".to_string(), "/images/banner.webp".to_string());
378 let _ = meta.insert("category".to_string(), "Technology".to_string());
379 let _ = meta.insert("tags".to_string(), "rust, web".to_string());
380 let _ = meta.insert(
381 "copyright".to_string(),
382 "Copyright 2026 Alice".to_string(),
383 );
384 write_meta_sidecar(tmp.path(), "article-one", &meta);
385
386 let ctx = make_atom_ctx(tmp.path());
387 RssAggregatePlugin.after_compile(&ctx)?;
388
389 let result = fs::read_to_string(&rss_path)?;
390
391 assert!(
392 result.contains(
393 "<enclosure url=\"https://example.com/images/banner.webp\""
394 ),
395 "Should have enclosure with base_url prefix: {result}"
396 );
397 assert!(
398 result.contains("type=\"image/webp\""),
399 "Should detect webp MIME type: {result}"
400 );
401 assert!(
402 result.contains("<category>Technology</category>"),
403 "Should have category element: {result}"
404 );
405 assert!(
406 result.contains("<category>rust</category>"),
407 "Should have tag category 'rust': {result}"
408 );
409 assert!(
410 result.contains("<category>web</category>"),
411 "Should have tag category 'web': {result}"
412 );
413 assert!(
414 result.contains("<language>en</language>"),
415 "Should have language element: {result}"
416 );
417 assert!(
418 result.contains("<lastBuildDate>"),
419 "Should have lastBuildDate: {result}"
420 );
421 assert!(
422 result.contains("<copyright>Copyright 2026 Alice</copyright>"),
423 "Should have copyright: {result}"
424 );
425
426 Ok(())
427 }
428
429 #[test]
430 fn test_rss_aggregate_banner_with_image_field() -> Result<()> {
431 let tmp = tempdir()?;
432
433 let rss_path = tmp.path().join("rss.xml");
434 fs::write(
435 &rss_path,
436 r#"<?xml version="1.0" encoding="UTF-8"?>
437<rss version="2.0"><channel><title>T</title><link>https://example.com</link><description>D</description><item><title>X</title></item></channel></rss>"#,
438 )?;
439
440 let mut meta = HashMap::new();
441 let _ = meta.insert("title".to_string(), "Image Test".to_string());
442 let _ =
443 meta.insert("description".to_string(), "Testing image".to_string());
444 let _ = meta.insert(
445 "item_pub_date".to_string(),
446 "Mon, 01 Sep 2025 12:00:00 +0000".to_string(),
447 );
448 let _ = meta.insert("author".to_string(), "Bob".to_string());
449 let _ = meta.insert(
450 "image".to_string(),
451 "https://cdn.example.com/photo.png".to_string(),
452 );
453 write_meta_sidecar(tmp.path(), "img-test", &meta);
454
455 let ctx = make_atom_ctx(tmp.path());
456 RssAggregatePlugin.after_compile(&ctx)?;
457
458 let result = fs::read_to_string(&rss_path)?;
459 assert!(
460 result.contains("url=\"https://cdn.example.com/photo.png\""),
461 "Should use absolute image URL as-is: {result}"
462 );
463 assert!(
464 result.contains("type=\"image/png\""),
465 "Should detect png MIME type: {result}"
466 );
467 Ok(())
468 }
469
470 #[test]
471 fn test_rss_aggregate_jpeg_mime() -> Result<()> {
472 let tmp = tempdir()?;
473
474 let rss_path = tmp.path().join("rss.xml");
475 fs::write(
476 &rss_path,
477 r#"<?xml version="1.0" encoding="UTF-8"?>
478<rss version="2.0"><channel><title>T</title><link>https://example.com</link><description>D</description><item><title>X</title></item></channel></rss>"#,
479 )?;
480
481 let mut meta = HashMap::new();
482 let _ = meta.insert("title".to_string(), "JPEG Test".to_string());
483 let _ = meta.insert("description".to_string(), "desc".to_string());
484 let _ = meta.insert(
485 "item_pub_date".to_string(),
486 "Mon, 01 Sep 2025 12:00:00 +0000".to_string(),
487 );
488 let _ = meta.insert("author".to_string(), "Carol".to_string());
489 let _ = meta.insert("banner".to_string(), "/img/photo.jpg".to_string());
490 write_meta_sidecar(tmp.path(), "jpeg-test", &meta);
491
492 let ctx = make_atom_ctx(tmp.path());
493 RssAggregatePlugin.after_compile(&ctx)?;
494
495 let result = fs::read_to_string(&rss_path)?;
496 assert!(
497 result.contains("type=\"image/jpeg\""),
498 "Should default to image/jpeg for .jpg: {result}"
499 );
500 Ok(())
501 }
502
503 #[test]
504 fn test_rss_aggregate_skips_multi_item() -> Result<()> {
505 let tmp = tempdir()?;
506
507 let rss_path = tmp.path().join("rss.xml");
508 let original = r#"<?xml version="1.0" encoding="UTF-8"?>
509<rss version="2.0"><channel><title>T</title><link>x</link><description>D</description>
510<item><title>A</title></item>
511<item><title>B</title></item>
512</channel></rss>"#;
513 fs::write(&rss_path, original)?;
514
515 let ctx = test_ctx(tmp.path());
516 RssAggregatePlugin.after_compile(&ctx)?;
517
518 let result = fs::read_to_string(&rss_path)?;
519 assert_eq!(result, original, "Should not modify feed with >1 items");
520 Ok(())
521 }
522
523 #[test]
524 fn test_collect_articles_empty_entries() {
525 let articles = collect_articles(&[], "https://example.com");
526 assert!(
527 articles.is_empty(),
528 "no meta entries should produce no articles"
529 );
530 }
531
532 #[test]
533 fn test_collect_articles_skips_empty_title() {
534 let mut meta = HashMap::new();
535 let _ =
536 meta.insert("description".to_string(), "no title here".to_string());
537 let entries = vec![("page".to_string(), meta)];
538 let articles = collect_articles(&entries, "https://example.com");
539 assert!(
540 articles.is_empty(),
541 "entries without title should be skipped"
542 );
543 }
544
545 #[test]
546 fn test_collect_articles_skips_empty_path() {
547 let mut meta = HashMap::new();
548 let _ = meta.insert("title".to_string(), "Has Title".to_string());
549 let entries = vec![(String::new(), meta)];
550 let articles = collect_articles(&entries, "https://example.com");
551 assert!(
552 articles.is_empty(),
553 "entries with empty path should be skipped"
554 );
555 }
556
557 #[test]
558 fn test_collect_articles_multiple_entries_sorted() {
559 let mut meta1 = HashMap::new();
560 let _ = meta1.insert("title".to_string(), "Older".to_string());
561 let _ = meta1.insert("description".to_string(), "old".to_string());
562 let _ = meta1.insert(
563 "item_pub_date".to_string(),
564 "Mon, 01 Jan 2024 00:00:00 +0000".to_string(),
565 );
566 let _ = meta1.insert("author".to_string(), "A".to_string());
567
568 let mut meta2 = HashMap::new();
569 let _ = meta2.insert("title".to_string(), "Newer".to_string());
570 let _ = meta2.insert("description".to_string(), "new".to_string());
571 let _ = meta2.insert(
572 "item_pub_date".to_string(),
573 "Wed, 01 Jan 2025 00:00:00 +0000".to_string(),
574 );
575 let _ = meta2.insert("author".to_string(), "B".to_string());
576
577 let entries = vec![
578 ("old-post".to_string(), meta1),
579 ("new-post".to_string(), meta2),
580 ];
581 let mut articles = collect_articles(&entries, "https://example.com");
582 assert_eq!(articles.len(), 2);
583
584 articles.sort_by(|a, b| b.0.cmp(&a.0));
586 assert!(
587 articles[0].1.contains("<title>Newer</title>"),
588 "newest article should sort first"
589 );
590 }
591
592 #[test]
593 fn test_collect_articles_xml_escapes_description() {
594 let mut meta = HashMap::new();
595 let _ = meta.insert("title".to_string(), "Escape Test".to_string());
596 let _ = meta.insert(
597 "description".to_string(),
598 "Use <b>bold</b> & \"quotes\"".to_string(),
599 );
600 let _ = meta.insert("author".to_string(), "X".to_string());
601 let entries = vec![("esc".to_string(), meta)];
602 let articles = collect_articles(&entries, "");
603 assert_eq!(articles.len(), 1);
604 let xml = &articles[0].1;
605 assert!(
606 xml.contains("<b>bold</b>"),
607 "angle brackets should be escaped: {xml}"
608 );
609 assert!(xml.contains("&"), "ampersands should be escaped: {xml}");
610 }
611
612 #[test]
613 fn test_build_rss_channel_minimal() {
614 let result = build_rss_channel(
615 "Title",
616 "https://x.example",
617 "Desc",
618 "https://x.example",
619 "",
620 "",
621 "",
622 "",
623 );
624 assert!(result.contains("<title>Title</title>"));
625 assert!(result.contains("<link>https://x.example</link>"));
626 assert!(result.contains("<description>Desc</description>"));
627 assert!(
628 !result.contains("<language>"),
629 "no language when empty string supplied"
630 );
631 assert!(
632 !result.contains("<lastBuildDate>"),
633 "no lastBuildDate when empty string supplied"
634 );
635 }
636
637 #[test]
638 fn test_build_rss_channel_with_all_extras() {
639 let result = build_rss_channel(
640 "T",
641 "L",
642 "D",
643 "https://x.example",
644 "en",
645 "Mon, 01 Jan 2024 00:00:00 +0000",
646 "Copyright 2024 X",
647 "<item><title>A</title></item>",
648 );
649 assert!(result.contains("<language>en</language>"));
650 assert!(result.contains(
651 "<lastBuildDate>Mon, 01 Jan 2024 00:00:00 +0000</lastBuildDate>"
652 ));
653 assert!(result.contains("<copyright>Copyright 2024 X</copyright>"));
654 assert!(result.contains("<item><title>A</title></item>"));
655 }
656
657 #[test]
658 fn test_extract_last_build_date_from_articles() {
659 let articles = vec![
660 ("2025".to_string(), "<item><pubDate>Mon, 01 Sep 2025 12:00:00 +0000</pubDate></item>".to_string()),
661 ("2024".to_string(), "<item><pubDate>Mon, 01 Jan 2024 00:00:00 +0000</pubDate></item>".to_string()),
662 ];
663 let date = extract_last_build_date(&articles);
664 assert_eq!(date, "Mon, 01 Sep 2025 12:00:00 +0000");
665 }
666
667 #[test]
668 fn test_extract_last_build_date_empty() {
669 let articles: Vec<(String, String)> = vec![];
670 let date = extract_last_build_date(&articles);
671 assert!(date.is_empty());
672 }
673
674 #[test]
675 fn test_rss_no_file_is_noop() -> Result<()> {
676 let tmp = tempdir()?;
677 let ctx = test_ctx(tmp.path());
679 RssAggregatePlugin.after_compile(&ctx)?;
680 assert!(!tmp.path().join("rss.xml").exists());
681 Ok(())
682 }
683}