1mod canonical;
14pub mod helpers;
15mod jsonld;
16mod robots;
17mod seo_plugin;
18
19pub use canonical::CanonicalPlugin;
20pub use jsonld::{
21 validate_jsonld, JsonLdConfig, JsonLdPlugin, JsonLdValidationError,
22};
23pub use robots::RobotsPlugin;
24pub use seo_plugin::SeoPlugin;
25
26#[cfg(test)]
27#[allow(clippy::unwrap_used, clippy::expect_used)]
28mod tests {
29 use super::helpers::*;
30 use super::*;
31 use crate::plugin::{Plugin, PluginContext};
32 use anyhow::Result;
33 use std::fs;
34 use std::path::Path;
35 use tempfile::tempdir;
36
37 fn make_html(title: &str, body: &str) -> String {
38 format!(
39 "<html><head><title>{title}</title></head>\
40 <body>{body}</body></html>"
41 )
42 }
43
44 fn test_ctx(site_dir: &Path) -> PluginContext {
45 crate::test_support::init_logger();
46 PluginContext::new(
47 Path::new("content"),
48 Path::new("build"),
49 site_dir,
50 Path::new("templates"),
51 )
52 }
53
54 #[test]
59 fn test_extract_title_present() {
60 let html = "<html><head><title>My Page</title></head></html>";
61 assert_eq!(extract_title(html), "My Page");
62 }
63
64 #[test]
65 fn test_extract_title_missing() {
66 let html = "<html><head></head><body></body></html>";
67 assert_eq!(extract_title(html), "");
68 }
69
70 #[test]
71 fn test_extract_description_truncates() {
72 let long = "word ".repeat(100);
73 let html =
74 format!("<html><head></head><body><p>{long}</p></body></html>");
75 let desc = extract_description(&html, 160);
76 assert!(desc.len() <= 160);
77 assert!(!desc.is_empty());
78 }
79
80 #[test]
85 fn test_seo_plugin_name() {
86 assert_eq!(SeoPlugin.name(), "seo");
87 }
88
89 #[test]
90 fn test_seo_plugin_injects_meta_tags() -> Result<()> {
91 let tmp = tempdir()?;
92 let ctx = test_ctx(tmp.path());
93 let html = make_html("Hello World", "<p>Some content here</p>");
94
95 let result =
96 SeoPlugin.transform_html(&html, Path::new("index.html"), &ctx)?;
97 assert!(result.contains("<meta name=\"description\""));
98 assert!(result.contains("<meta property=\"og:title\""));
99 assert!(result.contains("Hello World"));
100 assert!(result.contains("<meta property=\"og:description\""));
101 assert!(
102 result.contains("<meta property=\"og:type\" content=\"website\"")
103 );
104 assert!(
105 result.contains("<meta name=\"twitter:card\" content=\"summary\"")
106 );
107 Ok(())
108 }
109
110 #[test]
111 fn test_seo_plugin_idempotent() -> Result<()> {
112 let tmp = tempdir()?;
113 let ctx = test_ctx(tmp.path());
114 let html = make_html("Test", "<p>Content</p>");
115
116 let first =
117 SeoPlugin.transform_html(&html, Path::new("page.html"), &ctx)?;
118 let second =
119 SeoPlugin.transform_html(&first, Path::new("page.html"), &ctx)?;
120
121 assert_eq!(first, second);
122 Ok(())
123 }
124
125 #[test]
126 fn test_extract_description_excludes_nav_header_footer() {
127 let html = r##"<html><head></head><body>
128 <a href="#main">Skip to content</a>
129 <nav><ul><li>Home</li><li>About</li><li>Search</li></ul></nav>
130 <header><h1>Site Header</h1></header>
131 <main><p>This is the actual page content that should be extracted.</p></main>
132 <footer><p>Copyright 2026</p></footer>
133 </body></html>"##;
134 let desc = extract_description(html, 160);
135 assert!(
136 desc.contains("actual page content"),
137 "description should contain main content, got: {desc}"
138 );
139 assert!(
140 !desc.contains("Skip to content"),
141 "description should not contain skip link text"
142 );
143 assert!(
144 !desc.contains("Site Header"),
145 "description should not contain header text"
146 );
147 assert!(
148 !desc.contains("Copyright"),
149 "description should not contain footer text"
150 );
151 }
152
153 #[test]
154 fn test_seo_plugin_handles_missing_title() -> Result<()> {
155 let tmp = tempdir()?;
156 let ctx = test_ctx(tmp.path());
157 let html =
158 "<html><head></head><body><p>No title here</p></body></html>";
159
160 let result =
161 SeoPlugin.transform_html(html, Path::new("no-title.html"), &ctx)?;
162 assert!(result.contains("<meta property=\"og:type\""));
164 assert!(result.contains("<meta name=\"twitter:card\""));
165 assert!(!result.contains("<meta property=\"og:title\""));
167 Ok(())
168 }
169
170 #[test]
171 fn test_seo_plugin_empty_dir() -> Result<()> {
172 let tmp = tempdir()?;
173 let ctx = test_ctx(tmp.path());
174 assert!(SeoPlugin.after_compile(&ctx).is_ok());
175 Ok(())
176 }
177
178 #[test]
179 fn test_seo_plugin_nonexistent_dir() -> Result<()> {
180 let ctx = test_ctx(Path::new("/nonexistent/path"));
181 assert!(SeoPlugin.after_compile(&ctx).is_ok());
182 Ok(())
183 }
184
185 #[test]
190 fn test_robots_plugin_name() {
191 let plugin = RobotsPlugin::new("https://example.com");
192 assert_eq!(plugin.name(), "robots");
193 }
194
195 #[test]
196 fn test_robots_plugin_creates_file() -> Result<()> {
197 let tmp = tempdir()?;
198 let ctx = test_ctx(tmp.path());
199 let plugin = RobotsPlugin::new("https://example.com");
200 plugin.after_compile(&ctx)?;
201
202 let path = tmp.path().join("robots.txt");
203 assert!(path.exists());
204 Ok(())
205 }
206
207 #[test]
208 fn test_robots_plugin_correct_content() -> Result<()> {
209 let tmp = tempdir()?;
210 let ctx = test_ctx(tmp.path());
211 let plugin = RobotsPlugin::new("https://example.com");
212 plugin.after_compile(&ctx)?;
213
214 let content = fs::read_to_string(tmp.path().join("robots.txt"))?;
215 assert!(content.contains("User-agent: *"));
216 assert!(content.contains("Allow: /"));
217 assert!(content.contains("Sitemap: https://example.com/sitemap.xml"));
218 Ok(())
219 }
220
221 #[test]
222 fn test_robots_plugin_does_not_overwrite() -> Result<()> {
223 let tmp = tempdir()?;
224 let robots_path = tmp.path().join("robots.txt");
225 fs::write(&robots_path, "User-agent: *\nDisallow: /secret\n")?;
226
227 let ctx = test_ctx(tmp.path());
228 let plugin = RobotsPlugin::new("https://example.com");
229 plugin.after_compile(&ctx)?;
230
231 let content = fs::read_to_string(&robots_path)?;
232 assert!(content.contains("Disallow: /secret"));
233 assert!(!content.contains("Sitemap:"));
234 Ok(())
235 }
236
237 #[test]
238 fn test_robots_plugin_custom_base_url() -> Result<()> {
239 let tmp = tempdir()?;
240 let ctx = test_ctx(tmp.path());
241 let plugin = RobotsPlugin::new("https://my-site.org");
242 plugin.after_compile(&ctx)?;
243
244 let content = fs::read_to_string(tmp.path().join("robots.txt"))?;
245 assert!(content.contains("Sitemap: https://my-site.org/sitemap.xml"));
246 Ok(())
247 }
248
249 #[test]
254 fn test_canonical_plugin_name() {
255 let plugin = CanonicalPlugin::new("https://example.com");
256 assert_eq!(plugin.name(), "canonical");
257 }
258
259 #[test]
260 fn test_canonical_plugin_injects_tag() -> Result<()> {
261 let tmp = tempdir()?;
262 let ctx = test_ctx(tmp.path());
263 let plugin = CanonicalPlugin::new("https://example.com");
264 let html = make_html("Home", "<p>Welcome</p>");
265 let page_path = tmp.path().join("index.html");
266
267 let result = plugin.transform_html(&html, &page_path, &ctx)?;
268 assert!(result.contains("<link rel=\"canonical\""));
269 assert!(result.contains("https://example.com/index.html"));
270 Ok(())
271 }
272
273 #[test]
274 fn test_canonical_plugin_idempotent() -> Result<()> {
275 let tmp = tempdir()?;
276 let ctx = test_ctx(tmp.path());
277 let plugin = CanonicalPlugin::new("https://example.com");
278 let html = make_html("Page", "<p>Content</p>");
279 let page_path = tmp.path().join("page.html");
280
281 let first = plugin.transform_html(&html, &page_path, &ctx)?;
282 let second = plugin.transform_html(&first, &page_path, &ctx)?;
283
284 assert_eq!(first, second);
285 Ok(())
286 }
287
288 #[test]
289 fn test_canonical_plugin_nested_files() -> Result<()> {
290 let tmp = tempdir()?;
291 fs::create_dir_all(tmp.path().join("blog"))?;
292 let ctx = test_ctx(tmp.path());
293 let plugin = CanonicalPlugin::new("https://example.com");
294 let html = make_html("Post", "<p>Blog post</p>");
295 let page_path = tmp.path().join("blog/post.html");
296
297 let result = plugin.transform_html(&html, &page_path, &ctx)?;
298 assert!(result.contains("https://example.com/blog/post.html"));
299 Ok(())
300 }
301
302 #[test]
307 fn test_all_plugins_register() {
308 use crate::plugin::PluginManager;
309 let mut pm = PluginManager::new();
310 pm.register(SeoPlugin);
311 pm.register(RobotsPlugin::new("https://example.com"));
312 pm.register(CanonicalPlugin::new("https://example.com"));
313 assert_eq!(pm.len(), 3);
314 assert_eq!(pm.names(), vec!["seo", "robots", "canonical"]);
315 }
316
317 #[test]
322 fn extract_description_unicode_truncation_respects_char_boundary() {
323 let text = "café 日本語 ".repeat(30);
325 let html =
326 format!("<html><head></head><body><p>{text}</p></body></html>");
327
328 let desc = extract_description(&html, 50);
330
331 assert!(desc.len() <= 50);
333 assert!(!desc.is_empty());
334 let _ = desc.chars().count();
336 }
337
338 #[test]
339 fn extract_description_empty_main_falls_back_to_body() {
340 let html = "<html><head></head><body>\
342 <main></main>\
343 <p>Body fallback text</p>\
344 </body></html>";
345
346 let desc = extract_description(html, 160);
348
349 assert!(
351 desc.is_empty(),
352 "expected empty description from empty <main>, got: {desc}"
353 );
354 }
355
356 #[test]
357 fn extract_description_no_body_uses_raw_html() {
358 let html = "<div><p>Raw content without body</p></div>";
360
361 let desc = extract_description(html, 160);
363
364 assert!(
366 desc.contains("Raw content without body"),
367 "expected raw content fallback, got: {desc}"
368 );
369 }
370
371 #[test]
372 fn extract_title_with_nested_tags() {
373 let html = "<html><head><title><span>Foo</span></title></head></html>";
375
376 let title = extract_title(html);
378
379 assert_eq!(title, "Foo");
381 }
382
383 #[test]
384 fn escape_attr_all_special_chars() {
385 let input = r#"Tom & "Jerry" <script>alert('xss')</script>"#;
387
388 let escaped = escape_attr(input);
390
391 assert!(escaped.contains("&"), "& should be escaped");
393 assert!(escaped.contains("""), "\" should be escaped");
394 assert!(escaped.contains("<"), "< should be escaped");
395 assert!(escaped.contains(">"), "> should be escaped");
396 assert_eq!(
397 escaped,
398 "Tom & "Jerry" <script>alert('xss')</script>"
399 );
400 }
401
402 #[test]
403 fn seo_plugin_skips_existing_single_quote_meta() -> Result<()> {
404 let html = "<html><head>\
406 <meta name='description' content='Already set'>\
407 <meta property='og:title' content='Title'>\
408 <meta property='og:description' content='Desc'>\
409 <meta property='og:type' content='website'>\
410 <meta name='twitter:card' content='summary'>\
411 <title>Test</title></head>\
412 <body><p>Content</p></body></html>";
413 let tmp = tempdir()?;
414 let ctx = test_ctx(tmp.path());
415
416 let result = SeoPlugin.transform_html(
418 html,
419 Path::new("single-quote.html"),
420 &ctx,
421 )?;
422 assert_eq!(
423 result.matches("meta name=\"description\"").count()
424 + result.matches("meta name='description'").count(),
425 1,
426 "description meta should not be duplicated"
427 );
428 assert_eq!(
429 result.matches("og:title").count(),
430 1,
431 "og:title should not be duplicated"
432 );
433 Ok(())
434 }
435
436 #[test]
437 fn canonical_plugin_trailing_slash_base_url() -> Result<()> {
438 let tmp = tempdir()?;
439 let ctx = test_ctx(tmp.path());
440 let plugin = CanonicalPlugin::new("https://example.com/");
441 let html = make_html("Home", "<p>Welcome</p>");
442 let page_path = tmp.path().join("index.html");
443
444 let result = plugin.transform_html(&html, &page_path, &ctx)?;
445 assert!(
446 result.contains("https://example.com/index.html"),
447 "should produce clean URL without double slash"
448 );
449 assert!(
450 !result.contains("https://example.com//"),
451 "should not contain double slash in canonical URL"
452 );
453 Ok(())
454 }
455
456 #[test]
457 fn robots_plugin_trailing_slash_base_url() -> Result<()> {
458 let tmp = tempdir()?;
460 let ctx = test_ctx(tmp.path());
461 let plugin = RobotsPlugin::new("https://example.com/");
462
463 plugin.after_compile(&ctx)?;
465
466 let content = fs::read_to_string(tmp.path().join("robots.txt"))?;
468 assert!(
469 content.contains("Sitemap: https://example.com/sitemap.xml"),
470 "sitemap URL should not have double slash, got: {content}"
471 );
472 assert!(
473 !content.contains("https://example.com//"),
474 "should not contain double slash"
475 );
476 Ok(())
477 }
478
479 #[test]
480 fn extract_description_nested_script_in_main() {
481 let html = "<html><head></head><body>\
483 <main>\
484 <script>var x = 'ignore me';</script>\
485 <p>Visible text after script</p>\
486 </main></body></html>";
487
488 let desc = extract_description(html, 160);
490
491 assert!(
493 desc.contains("Visible text after script"),
494 "should contain the paragraph text, got: {desc}"
495 );
496 assert!(
497 !desc.contains("ignore me"),
498 "should not contain script content, got: {desc}"
499 );
500 }
501
502 #[test]
507 fn test_jsonld_injects_webpage() {
508 let dir = tempdir().unwrap();
509 let site = dir.path().join("site");
510 fs::create_dir_all(&site).unwrap();
511
512 let html = make_html("About", "<p>About us</p>");
513 let ctx = test_ctx(&site);
514 let plugin = JsonLdPlugin::from_site("https://example.com", "Test Org");
515 let page_path = site.join("about.html");
516
517 let output = plugin.transform_html(&html, &page_path, &ctx).unwrap();
518 assert!(output.contains("application/ld+json"));
519 assert!(output.contains("\"@type\":\"WebPage\""));
520 assert!(output.contains("\"name\":\"About\""));
521 }
522
523 #[test]
524 fn test_jsonld_injects_article() {
525 let dir = tempdir().unwrap();
526 let site = dir.path().join("site");
527 fs::create_dir_all(&site).unwrap();
528
529 let html = "<html><head><title>Post</title></head>\
530 <body><article><h1>Post</h1></article></body></html>";
531 let ctx = test_ctx(&site);
532 let plugin = JsonLdPlugin::from_site("https://example.com", "My Org");
533 let page_path = site.join("post.html");
534
535 let output = plugin.transform_html(html, &page_path, &ctx).unwrap();
536 assert!(output.contains("\"@type\":\"Article\""));
537 assert!(output.contains("\"headline\":\"Post\""));
538 assert!(output.contains("My Org"));
539 }
540
541 #[test]
542 fn test_jsonld_breadcrumbs() {
543 let dir = tempdir().unwrap();
544 let site = dir.path().join("site");
545 let blog = site.join("blog");
546 fs::create_dir_all(&blog).unwrap();
547
548 let html = make_html("My Post", "<p>Content</p>");
549 let ctx = test_ctx(&site);
550 let plugin = JsonLdPlugin::from_site("https://example.com", "Org");
551 let page_path = blog.join("my-post.html");
552
553 let output = plugin.transform_html(&html, &page_path, &ctx).unwrap();
554 assert!(output.contains("BreadcrumbList"));
555 assert!(output.contains("\"name\":\"Home\""));
556 assert!(output.contains("\"name\":\"blog\""));
557 }
558
559 #[test]
560 fn test_jsonld_idempotent() {
561 let dir = tempdir().unwrap();
562 let site = dir.path().join("site");
563 fs::create_dir_all(&site).unwrap();
564
565 let html = "<html><head><title>X</title>\
566 <script type=\"application/ld+json\">{}</script>\
567 </head><body></body></html>";
568 let ctx = test_ctx(&site);
569 let plugin = JsonLdPlugin::from_site("https://example.com", "Org");
570 let page_path = site.join("x.html");
571
572 let output = plugin.transform_html(html, &page_path, &ctx).unwrap();
573 let count = output.matches("application/ld+json").count();
575 assert_eq!(count, 1);
576 }
577
578 #[test]
583 fn extract_title_empty_tag_returns_empty_string() {
584 assert_eq!(extract_title("<title></title>"), "");
585 assert_eq!(extract_title("<title> </title>"), "");
586 assert_eq!(extract_title("<title>\n\t </title>"), "");
587 }
588
589 #[test]
590 fn extract_title_without_closing_tag_returns_empty() {
591 assert_eq!(extract_title("<title>Unterminated"), "");
592 }
593
594 #[test]
595 fn extract_title_strips_inner_html_tags() {
596 let out = extract_title("<title>Hello <em>World</em></title>");
597 assert!(out.contains("Hello"));
598 assert!(out.contains("World"));
599 }
600
601 #[test]
606 fn extract_description_prefers_main_over_body() {
607 let html = r"<html><head></head><body>
608 <nav>menu</nav>
609 <main>The primary content.</main>
610 <footer>Bottom</footer>
611 </body></html>";
612 let desc = extract_description(html, 200);
613 assert!(desc.contains("primary content"));
614 assert!(!desc.contains("menu"));
615 }
616
617 #[test]
618 fn extract_description_main_without_closing_tag_takes_rest() {
619 let html = r"<html><body><main>content without close";
620 let desc = extract_description(html, 200);
621 assert!(desc.contains("content without close"));
622 }
623
624 #[test]
625 fn extract_description_main_without_angle_bracket_returns_empty_fallback() {
626 let html = "<html><body><main";
627 let desc = extract_description(html, 200);
628 assert_eq!(desc, "");
629 }
630
631 #[test]
632 fn extract_description_fallback_to_body_strips_script_and_style() {
633 let html = r"<html><head></head><body>
634 <script>alert('skip');</script>
635 <style>body { color: red; }</style>
636 <nav>menu items here</nav>
637 <header>site title</header>
638 <p>The body text.</p>
639 <footer>copyright</footer>
640 </body></html>";
641 let desc = extract_description(html, 200);
642 assert!(desc.contains("body text"));
643 assert!(!desc.contains("alert"));
644 assert!(!desc.contains("color: red"));
645 assert!(!desc.contains("menu items"));
646 assert!(!desc.contains("site title"));
647 assert!(!desc.contains("copyright"));
648 }
649
650 #[test]
651 fn extract_description_body_without_closing_tag_uses_rest() {
652 let html = "<html><body><p>open-ended body paragraph";
653 let desc = extract_description(html, 200);
654 assert!(desc.contains("open-ended body paragraph"));
655 }
656
657 #[test]
658 fn extract_description_body_without_angle_bracket_returns_empty() {
659 let html = "<html><body";
660 let desc = extract_description(html, 200);
661 assert_eq!(desc, "");
662 }
663
664 #[test]
665 fn extract_description_no_body_no_main_uses_entire_html() {
666 let html = "just plain text no tags here";
667 let desc = extract_description(html, 200);
668 assert!(desc.contains("just plain text"));
669 }
670
671 #[test]
672 fn extract_description_unterminated_script_breaks_out() {
673 let html = "<html><body><main><script>unterminated<p>x</p>";
674 let desc = extract_description(html, 200);
675 let _ = desc;
676 }
677
678 #[test]
679 fn extract_description_truncates_at_word_boundary() {
680 let html = "<html><body><main>one two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen twenty twenty-one twenty-two twenty-three twenty-four twenty-five</main></body></html>";
681 let desc = extract_description(html, 80);
682 assert!(desc.len() <= 80);
683 assert!(!desc.ends_with('-'));
684 }
685
686 #[test]
687 fn extract_description_truncates_without_space_falls_to_byte_cut() {
688 let html =
689 "<html><body><main>oneverylongwordwithnospacesanywherehere</main></body></html>";
690 let desc = extract_description(html, 10);
691 assert!(desc.len() <= 10);
692 }
693
694 #[test]
695 fn extract_description_respects_char_boundary_on_truncation() {
696 let html = "<html><body><main>Rust programming — é ñ ü characters everywhere in this text that we want to truncate mid-char</main></body></html>";
697 let desc = extract_description(html, 30);
698 assert!(desc.is_ascii() || !desc.is_empty());
699 }
700
701 #[test]
702 fn extract_description_truncation_walks_back_multiple_bytes() {
703 let mut input = String::from("<html><body><main>");
704 input.push_str(&"a".repeat(20));
705 input.push('🎉'); input.push_str(&"b".repeat(20));
707 input.push_str("</main></body></html>");
708 let desc = extract_description(&input, 22);
709 assert!(!desc.is_empty(), "expected non-empty desc");
710 let _ = desc.len();
711 }
712
713 #[test]
714 fn extract_description_body_fallback_unterminated_nav_breaks() {
715 let html = "<html><body><nav>unterminated nav block<p>visible</p>";
716 let desc = extract_description(html, 200);
717 let _ = desc;
718 }
719
720 #[test]
725 fn seo_plugin_file_without_head_tag_is_unchanged() {
726 let dir = tempdir().unwrap();
727 fs::write(
728 dir.path().join("fragment.html"),
729 "<p>no html/head/body structure</p>",
730 )
731 .unwrap();
732 let ctx = test_ctx(dir.path());
733 SeoPlugin.after_compile(&ctx).unwrap();
734 let out = fs::read_to_string(dir.path().join("fragment.html")).unwrap();
735 assert_eq!(out, "<p>no html/head/body structure</p>");
736 }
737
738 #[test]
739 fn seo_plugin_missing_site_dir_returns_ok() {
740 let dir = tempdir().unwrap();
741 let missing = dir.path().join("missing");
742 let ctx = test_ctx(&missing);
743 SeoPlugin.after_compile(&ctx).unwrap();
744 }
745
746 #[test]
751 fn robots_plugin_skips_existing_robots_txt() {
752 let dir = tempdir().unwrap();
753 let existing = dir.path().join("robots.txt");
754 fs::write(&existing, "USER: existing").unwrap();
755
756 let plugin = RobotsPlugin::new("https://example.com");
757 let ctx = test_ctx(dir.path());
758 plugin.after_compile(&ctx).unwrap();
759
760 assert_eq!(fs::read_to_string(&existing).unwrap(), "USER: existing");
761 }
762
763 #[test]
764 fn robots_plugin_writes_user_agent_and_sitemap() {
765 let dir = tempdir().unwrap();
766 let plugin = RobotsPlugin::new("https://example.com/");
767 let ctx = test_ctx(dir.path());
768 plugin.after_compile(&ctx).unwrap();
769
770 let body = fs::read_to_string(dir.path().join("robots.txt")).unwrap();
771 assert!(body.contains("User-agent: *"));
772 assert!(body.contains("Sitemap: https://example.com/sitemap.xml"));
773 }
774
775 #[test]
776 fn robots_plugin_missing_site_dir_returns_ok() {
777 let dir = tempdir().unwrap();
778 let missing = dir.path().join("missing");
779 let plugin = RobotsPlugin::new("https://example.com");
780 let ctx = test_ctx(&missing);
781 plugin.after_compile(&ctx).unwrap();
782 }
783
784 #[test]
785 fn robots_plugin_name_returns_static_identifier() {
786 assert_eq!(RobotsPlugin::new("").name(), "robots");
787 }
788
789 #[test]
794 fn canonical_plugin_missing_site_dir_returns_ok() {
795 let dir = tempdir().unwrap();
796 let missing = dir.path().join("missing");
797 let plugin = CanonicalPlugin::new("https://example.com");
798 let ctx = test_ctx(&missing);
799 plugin.after_compile(&ctx).unwrap();
800 }
801
802 #[test]
803 fn canonical_plugin_replaces_existing_canonical_with_correct_url() {
804 let dir = tempdir().unwrap();
805 let html = r#"<html><head><link rel="canonical" href="/original"></head><body></body></html>"#;
806 let plugin = CanonicalPlugin::new("https://example.com");
807 let ctx = test_ctx(dir.path());
808 let page_path = dir.path().join("p.html");
809
810 let out = plugin.transform_html(html, &page_path, &ctx).unwrap();
811 assert_eq!(out.matches(r#"rel="canonical""#).count(), 1);
812 assert!(out.contains("https://example.com/p.html"));
813 }
814
815 #[test]
816 fn canonical_plugin_skips_pages_with_single_quoted_canonical() {
817 let dir = tempdir().unwrap();
818 let html =
819 r"<html><head><link rel='canonical' href='/x'></head></html>";
820 let plugin = CanonicalPlugin::new("https://example.com");
821 let ctx = test_ctx(dir.path());
822 let page_path = dir.path().join("p.html");
823
824 let out = plugin.transform_html(html, &page_path, &ctx).unwrap();
825 assert_eq!(out.matches("canonical").count(), 1);
826 }
827
828 #[test]
829 fn canonical_plugin_page_without_head_is_left_unchanged() {
830 let dir = tempdir().unwrap();
831 let html = "<p>no structure</p>";
832 let plugin = CanonicalPlugin::new("https://example.com");
833 let ctx = test_ctx(dir.path());
834 let page_path = dir.path().join("frag.html");
835
836 let out = plugin.transform_html(html, &page_path, &ctx).unwrap();
837 assert_eq!(out, html);
838 }
839
840 #[test]
841 fn canonical_plugin_injects_canonical_link_before_head_close() {
842 let dir = tempdir().unwrap();
843 let html = "<html><head><title>T</title></head><body></body></html>";
844 let plugin = CanonicalPlugin::new("https://example.com/");
845 let ctx = test_ctx(dir.path());
846 let page_path = dir.path().join("a.html");
847
848 let out = plugin.transform_html(html, &page_path, &ctx).unwrap();
849 assert!(out.contains(r#"rel="canonical""#));
850 assert!(out.contains("https://example.com/a.html"));
851 }
852
853 #[test]
854 fn canonical_plugin_name_returns_static_identifier() {
855 assert_eq!(CanonicalPlugin::new("").name(), "canonical");
856 }
857
858 #[test]
863 fn jsonld_plugin_missing_site_dir_returns_ok() {
864 let dir = tempdir().unwrap();
865 let missing = dir.path().join("missing");
866 let plugin = JsonLdPlugin::from_site("https://example.com", "Org");
867 let ctx = test_ctx(&missing);
868 plugin.after_compile(&ctx).unwrap();
869 }
870
871 #[test]
872 fn jsonld_plugin_skips_pages_without_head_tag() {
873 let dir = tempdir().unwrap();
874 let site = dir.path().join("site");
875 fs::create_dir_all(&site).unwrap();
876 let ctx = test_ctx(&site);
877 let plugin = JsonLdPlugin::from_site("https://example.com", "Org");
878 let page_path = site.join("frag.html");
879
880 let out = plugin
881 .transform_html("<p>no head</p>", &page_path, &ctx)
882 .unwrap();
883 assert_eq!(out, "<p>no head</p>");
884 }
885
886 #[test]
887 fn jsonld_plugin_generates_webpage_when_no_article_element() {
888 let dir = tempdir().unwrap();
889 let site = dir.path().join("site");
890 fs::create_dir_all(&site).unwrap();
891 let html = "<html><head><title>Hello</title></head><body><p>content</p></body></html>";
892 let ctx = test_ctx(&site);
893 let plugin = JsonLdPlugin::from_site("https://example.com", "Org");
894 let page_path = site.join("index.html");
895
896 let out = plugin.transform_html(html, &page_path, &ctx).unwrap();
897 assert!(out.contains("application/ld+json"));
898 assert!(out.contains("WebPage"));
899 }
900
901 #[test]
902 fn jsonld_plugin_generates_article_when_article_element_present() {
903 let dir = tempdir().unwrap();
904 let site = dir.path().join("site");
905 fs::create_dir_all(&site).unwrap();
906 let html = "<html><head><title>Post</title></head><body><article><h1>Post</h1></article></body></html>";
907 let ctx = test_ctx(&site);
908 let plugin = JsonLdPlugin::from_site("https://example.com", "Org");
909 let page_path = site.join("post.html");
910
911 let out = plugin.transform_html(html, &page_path, &ctx).unwrap();
912 assert!(out.contains("application/ld+json"));
913 assert!(out.contains(r#""Article""#));
914 }
915
916 #[test]
917 fn jsonld_plugin_new_stores_supplied_config() {
918 let cfg = JsonLdConfig {
919 base_url: "https://a".to_string(),
920 org_name: "Org".to_string(),
921 breadcrumbs: false,
922 };
923 let plugin = JsonLdPlugin::new(cfg);
924 assert_eq!(plugin.config.base_url, "https://a");
925 assert_eq!(plugin.config.org_name, "Org");
926 assert!(!plugin.config.breadcrumbs);
927 }
928
929 #[test]
930 fn jsonld_plugin_name_returns_static_identifier() {
931 let plugin = JsonLdPlugin::from_site("https://example.com", "Org");
932 assert_eq!(plugin.name(), "json-ld");
933 }
934
935 #[test]
940 fn collect_html_files_recursive_filters_and_sorts() {
941 let dir = tempdir().unwrap();
942 let sub = dir.path().join("sub");
943 fs::create_dir(&sub).unwrap();
944 fs::write(dir.path().join("z.html"), "").unwrap();
945 fs::write(dir.path().join("a.html"), "").unwrap();
946 fs::write(sub.join("m.html"), "").unwrap();
947 fs::write(dir.path().join("ignore.css"), "").unwrap();
948
949 let files = collect_html_files_recursive(dir.path()).unwrap();
950 assert_eq!(files.len(), 3);
951 }
952
953 #[test]
954 fn collect_html_files_recursive_missing_dir_returns_empty() {
955 let dir = tempdir().unwrap();
956 let result =
957 collect_html_files_recursive(&dir.path().join("missing")).unwrap();
958 assert!(result.is_empty());
959 }
960
961 #[test]
966 fn has_meta_tag_detects_name_double_quote() {
967 let html = r#"<meta name="description" content="hello">"#;
968 assert!(has_meta_tag(html, "description"));
969 }
970
971 #[test]
972 fn has_meta_tag_detects_name_single_quote() {
973 let html = "<meta name='description' content='hello'>";
974 assert!(has_meta_tag(html, "description"));
975 }
976
977 #[test]
978 fn has_meta_tag_detects_property_double_quote() {
979 let html = r#"<meta property="og:title" content="T">"#;
980 assert!(has_meta_tag(html, "og:title"));
981 }
982
983 #[test]
984 fn has_meta_tag_detects_property_single_quote() {
985 let html = "<meta property='og:title' content='T'>";
986 assert!(has_meta_tag(html, "og:title"));
987 }
988
989 #[test]
990 fn has_meta_tag_returns_false_when_absent() {
991 let html = "<html><head></head></html>";
992 assert!(!has_meta_tag(html, "description"));
993 }
994
995 #[test]
996 fn has_meta_tag_ignores_comment_markers() {
997 let html = "<!-- # Start Open Graph / Facebook Meta Tags -->\n\
998 <!-- # End Open Graph / Facebook Meta Tags -->";
999 assert!(!has_meta_tag(html, "og:title"));
1000 }
1001
1002 #[test]
1007 fn extract_canonical_finds_url() {
1008 let html = r#"<link rel="canonical" href="https://example.com/page">"#;
1009 assert_eq!(extract_canonical(html), "https://example.com/page");
1010 }
1011
1012 #[test]
1013 fn extract_canonical_returns_empty_when_missing() {
1014 let html = "<html><head><title>No canonical</title></head></html>";
1015 assert_eq!(extract_canonical(html), "");
1016 }
1017
1018 #[test]
1023 fn extract_existing_meta_name_variant() {
1024 let html = r#"<meta name="author" content="Alice">"#;
1025 assert_eq!(extract_existing_meta(html, "author"), "Alice");
1026 }
1027
1028 #[test]
1029 fn extract_existing_meta_property_variant() {
1030 let html =
1031 r#"<meta property="article:published_time" content="2026-01-01">"#;
1032 assert_eq!(
1033 extract_existing_meta(html, "article:published_time"),
1034 "2026-01-01"
1035 );
1036 }
1037
1038 #[test]
1039 fn extract_existing_meta_single_quote_variant() {
1040 let html = "<meta name='author' content='Bob'>";
1041 assert_eq!(extract_existing_meta(html, "author"), "Bob");
1042 }
1043
1044 #[test]
1045 fn extract_existing_meta_returns_empty_when_absent() {
1046 let html = "<html><head></head></html>";
1047 assert_eq!(extract_existing_meta(html, "author"), "");
1048 }
1049
1050 #[test]
1055 fn extract_meta_author_from_meta_tag() {
1056 let html = r#"<meta name="author" content="Jane Doe">"#;
1057 assert_eq!(extract_meta_author(html), "Jane Doe");
1058 }
1059
1060 #[test]
1061 fn extract_meta_author_from_class_author_span() {
1062 let html = r#"<span class="author">John Smith</span>"#;
1063 assert_eq!(extract_meta_author(html), "John Smith");
1064 }
1065
1066 #[test]
1067 fn extract_meta_author_strips_by_prefix() {
1068 let html = r#"<span class="author">by Alice Wonder</span>"#;
1069 assert_eq!(extract_meta_author(html), "Alice Wonder");
1070 }
1071
1072 #[test]
1073 fn extract_meta_author_returns_empty_when_absent() {
1074 let html = "<html><body><p>No author</p></body></html>";
1075 assert_eq!(extract_meta_author(html), "");
1076 }
1077
1078 #[test]
1083 fn extract_date_from_html_finds_date_published() {
1084 let html = r#"<script type="application/ld+json">{"datePublished":"2026-03-15"}</script>"#;
1085 assert_eq!(
1086 extract_date_from_html(html, "datePublished"),
1087 Some("2026-03-15".to_string())
1088 );
1089 }
1090
1091 #[test]
1092 fn extract_date_from_html_returns_none_when_absent() {
1093 let html = "<html><body></body></html>";
1094 assert_eq!(extract_date_from_html(html, "datePublished"), None);
1095 }
1096
1097 #[test]
1102 fn extract_meta_date_from_published_time() {
1103 let html =
1104 r#"<meta property="article:published_time" content="2026-06-01">"#;
1105 assert_eq!(extract_meta_date(html), Some("2026-06-01".to_string()));
1106 }
1107
1108 #[test]
1109 fn extract_meta_date_from_time_datetime() {
1110 let html = r#"<time datetime="2026-07-04">July 4</time>"#;
1111 assert_eq!(extract_meta_date(html), Some("2026-07-04".to_string()));
1112 }
1113
1114 #[test]
1115 fn extract_meta_date_returns_none_when_absent() {
1116 let html = "<html><body><p>No date</p></body></html>";
1117 assert_eq!(extract_meta_date(html), None);
1118 }
1119
1120 #[test]
1125 fn extract_html_lang_double_quotes() {
1126 let html = r#"<html lang="fr-FR"><head></head></html>"#;
1127 assert_eq!(extract_html_lang(html), "fr-FR");
1128 }
1129
1130 #[test]
1131 fn extract_html_lang_single_quotes() {
1132 let html = "<html lang='de-DE'><head></head></html>";
1133 assert_eq!(extract_html_lang(html), "de-DE");
1134 }
1135
1136 #[test]
1137 fn extract_html_lang_missing_returns_empty() {
1138 let html = "<html><head></head></html>";
1139 assert_eq!(extract_html_lang(html), "");
1140 }
1141
1142 #[test]
1147 fn extract_first_content_image_from_main() {
1148 let html = r#"<html><body><main><img src="/img/hero.jpg"></main></body></html>"#;
1149 assert_eq!(extract_first_content_image(html), "/img/hero.jpg");
1150 }
1151
1152 #[test]
1153 fn extract_first_content_image_from_article() {
1154 let html = r#"<html><body><article><img src="/img/post.png"></article></body></html>"#;
1155 assert_eq!(extract_first_content_image(html), "/img/post.png");
1156 }
1157
1158 #[test]
1159 fn extract_first_content_image_no_image_returns_empty() {
1160 let html = "<html><body><main><p>No images</p></main></body></html>";
1161 assert_eq!(extract_first_content_image(html), "");
1162 }
1163
1164 #[test]
1165 fn extract_first_content_image_no_main_or_article_returns_empty() {
1166 let html = r#"<html><body><div><img src="/img/sidebar.jpg"></div></body></html>"#;
1167 assert_eq!(extract_first_content_image(html), "");
1168 }
1169
1170 #[test]
1175 fn inject_seo_tags_article_page_uses_large_image_card() -> Result<()> {
1176 let tmp = tempdir()?;
1177 let html = "<html><head><title>Blog Post</title></head>\
1178 <body><article><p>Article content</p></article></body></html>";
1179 let ctx = test_ctx(tmp.path());
1180
1181 let result =
1182 SeoPlugin.transform_html(html, Path::new("post.html"), &ctx)?;
1183 assert!(
1184 result.contains("content=\"summary_large_image\""),
1185 "article pages should use summary_large_image twitter card"
1186 );
1187 assert!(
1188 result.contains("content=\"article\""),
1189 "article pages should use og:type=article"
1190 );
1191 Ok(())
1192 }
1193
1194 #[test]
1199 fn canonical_plugin_replaces_not_skips_existing() -> Result<()> {
1200 let tmp = tempdir()?;
1201 let html = r#"<html><head><link rel="canonical" href="https://old.com/wrong"></head><body></body></html>"#;
1202 let plugin = CanonicalPlugin::new("https://correct.com");
1203 let ctx = test_ctx(tmp.path());
1204 let page_path = tmp.path().join("page.html");
1205
1206 let result = plugin.transform_html(html, &page_path, &ctx)?;
1207 assert!(
1208 result.contains("https://correct.com/page.html"),
1209 "canonical should be replaced with correct URL"
1210 );
1211 assert!(
1212 !result.contains("https://old.com/wrong"),
1213 "old canonical should be removed"
1214 );
1215 assert_eq!(
1216 result.matches("canonical").count(),
1217 1,
1218 "should have exactly one canonical link"
1219 );
1220 Ok(())
1221 }
1222}