1use super::helpers::{
7 extract_date_from_html, extract_description, extract_first_content_image,
8 extract_html_lang, extract_meta_author, extract_meta_date, extract_title,
9};
10use crate::plugin::{Plugin, PluginContext};
11use anyhow::Result;
12use std::path::Path;
13
14#[derive(Debug, Clone)]
16pub struct JsonLdConfig {
17 pub base_url: String,
19 pub org_name: String,
21 pub breadcrumbs: bool,
23}
24
25#[derive(Debug, Clone)]
34pub struct JsonLdPlugin {
35 pub(crate) config: JsonLdConfig,
36}
37
38impl JsonLdPlugin {
39 #[must_use]
41 pub const fn new(config: JsonLdConfig) -> Self {
42 Self { config }
43 }
44
45 #[must_use]
47 pub fn from_site(base_url: &str, site_name: &str) -> Self {
48 Self {
49 config: JsonLdConfig {
50 base_url: base_url.to_string(),
51 org_name: site_name.to_string(),
52 breadcrumbs: true,
53 },
54 }
55 }
56}
57
58fn build_article_jsonld(
60 title: &str,
61 description: &str,
62 page_url: &str,
63 org_name: &str,
64 author_name: &str,
65 image_url: &str,
66 date_published: Option<&String>,
67 date_modified: Option<&String>,
68 lang: &str,
69) -> serde_json::Value {
70 let mut article = serde_json::json!({
71 "@context": "https://schema.org",
72 "@type": "Article",
73 "headline": title,
74 "description": description,
75 "url": page_url,
76 "inLanguage": if lang.is_empty() { "en" } else { lang },
77 "mainEntityOfPage": {
78 "@type": "WebPage",
79 "@id": page_url
80 },
81 "publisher": {
82 "@type": "Organization",
83 "name": org_name
84 }
85 });
86
87 if !author_name.is_empty() {
88 article["author"] = serde_json::json!({
89 "@type": "Person",
90 "name": author_name
91 });
92 }
93
94 if !image_url.is_empty() {
95 article["image"] = serde_json::json!({
96 "@type": "ImageObject",
97 "url": image_url
98 });
99 }
100
101 if let Some(dp) = date_published {
102 article["datePublished"] = serde_json::json!(dp);
103 }
104 if let Some(dm) = date_modified {
105 article["dateModified"] = serde_json::json!(dm);
106 } else if let Some(dp) = date_published {
107 article["dateModified"] = serde_json::json!(dp);
108 }
109
110 article
111}
112
113fn build_webpage_jsonld(
115 title: &str,
116 description: &str,
117 page_url: &str,
118 author_name: &str,
119 image_url: &str,
120 date_published: Option<&String>,
121 lang: &str,
122) -> serde_json::Value {
123 let mut webpage = serde_json::json!({
124 "@context": "https://schema.org",
125 "@type": "WebPage",
126 "name": title,
127 "description": description,
128 "url": page_url,
129 "inLanguage": if lang.is_empty() { "en" } else { lang }
130 });
131
132 if !author_name.is_empty() {
133 webpage["author"] = serde_json::json!({
134 "@type": "Person",
135 "name": author_name
136 });
137 }
138
139 if !image_url.is_empty() {
140 webpage["image"] = serde_json::json!({
141 "@type": "ImageObject",
142 "url": image_url
143 });
144 }
145
146 if let Some(dp) = date_published {
147 webpage["datePublished"] = serde_json::json!(dp);
148 }
149
150 webpage
151}
152
153fn build_breadcrumb_jsonld(
155 base: &str,
156 rel_path: &str,
157) -> Option<serde_json::Value> {
158 let parts: Vec<&str> = rel_path
159 .trim_matches('/')
160 .split('/')
161 .filter(|p| !p.is_empty() && *p != "index.html")
162 .collect();
163
164 if parts.is_empty() {
165 return None;
166 }
167
168 let mut items = vec![serde_json::json!({
169 "@type": "ListItem",
170 "position": 1,
171 "name": "Home",
172 "item": format!("{}/", base)
173 })];
174
175 let mut accumulated = String::new();
176 for (i, part) in parts.iter().enumerate() {
177 accumulated = format!("{accumulated}/{part}");
178 let name = part.trim_end_matches(".html").replace('-', " ");
179 items.push(serde_json::json!({
180 "@type": "ListItem",
181 "position": i + 2,
182 "name": name,
183 "item": format!("{}{}", base, accumulated)
184 }));
185 }
186
187 Some(serde_json::json!({
188 "@context": "https://schema.org",
189 "@type": "BreadcrumbList",
190 "itemListElement": items
191 }))
192}
193
194fn build_jsonld_scripts(
196 html: &str,
197 base: &str,
198 rel_path: &str,
199 org_name: &str,
200 breadcrumbs: bool,
201) -> Vec<serde_json::Value> {
202 let title = extract_title(html);
203 let description = extract_description(html, 160);
204 let page_url = format!("{base}/{rel_path}");
205 let author_name = extract_meta_author(html);
206 let image_url = extract_first_content_image(html);
207 let date_published = extract_date_from_html(html, "datePublished")
208 .or_else(|| extract_meta_date(html));
209 let date_modified = extract_date_from_html(html, "dateModified");
210 let lang = extract_html_lang(html);
211
212 let mut scripts = Vec::new();
213
214 if html.contains("<article") {
215 scripts.push(build_article_jsonld(
216 &title,
217 &description,
218 &page_url,
219 org_name,
220 &author_name,
221 &image_url,
222 date_published.as_ref(),
223 date_modified.as_ref(),
224 &lang,
225 ));
226 } else {
227 scripts.push(build_webpage_jsonld(
228 &title,
229 &description,
230 &page_url,
231 &author_name,
232 &image_url,
233 date_published.as_ref(),
234 &lang,
235 ));
236 }
237
238 if breadcrumbs {
239 if let Some(breadcrumb) = build_breadcrumb_jsonld(base, rel_path) {
240 scripts.push(breadcrumb);
241 }
242 }
243
244 scripts
245}
246
247impl Plugin for JsonLdPlugin {
248 fn name(&self) -> &'static str {
249 "json-ld"
250 }
251
252 fn has_transform(&self) -> bool {
253 true
254 }
255
256 fn transform_html(
257 &self,
258 html: &str,
259 path: &Path,
260 ctx: &PluginContext,
261 ) -> Result<String> {
262 if html.contains("application/ld+json") {
263 return Ok(html.to_string());
264 }
265
266 let Some(head_pos) = html.find("</head>") else {
267 return Ok(html.to_string());
268 };
269
270 let base = self.config.base_url.trim_end_matches('/');
271 let site_dir = &ctx.site_dir;
272
273 let rel_path = path
274 .strip_prefix(site_dir)
275 .unwrap_or(path)
276 .to_string_lossy()
277 .replace('\\', "/");
278
279 let scripts = build_jsonld_scripts(
280 html,
281 base,
282 &rel_path,
283 &self.config.org_name,
284 self.config.breadcrumbs,
285 );
286
287 let mut injection = String::new();
288 for script in &scripts {
289 let json = serde_json::to_string(script)?;
290 injection.push_str(&format!(
291 "<script type=\"application/ld+json\">{json}</script>\n"
292 ));
293 }
294
295 let result =
296 format!("{}{}{}", &html[..head_pos], injection, &html[head_pos..]);
297 Ok(result)
298 }
299
300 fn after_compile(&self, _ctx: &PluginContext) -> Result<()> {
301 Ok(())
302 }
303}
304
305#[derive(Debug, Clone, PartialEq, Eq)]
311pub struct JsonLdValidationError {
312 pub schema_type: String,
314 pub field: String,
316 pub reason: String,
318}
319
320impl std::fmt::Display for JsonLdValidationError {
321 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
322 write!(
323 f,
324 "[{}] missing/invalid `{}` — {}",
325 self.schema_type, self.field, self.reason
326 )
327 }
328}
329
330#[must_use]
349pub fn validate_jsonld(html: &str) -> Vec<JsonLdValidationError> {
350 let mut errors = Vec::new();
351
352 for block in extract_jsonld_blocks(html) {
353 match serde_json::from_str::<serde_json::Value>(&block) {
354 Ok(value) => validate_one(&value, &mut errors),
355 Err(parse_err) => {
356 errors.push(JsonLdValidationError {
357 schema_type: "Unparseable".to_string(),
358 field: "(payload)".to_string(),
359 reason: format!("invalid JSON: {parse_err}"),
360 });
361 }
362 }
363 }
364
365 errors
366}
367
368fn extract_jsonld_blocks(html: &str) -> Vec<String> {
382 let mut blocks = Vec::new();
383 let lower = html.to_lowercase();
384 let mut cursor = 0;
385
386 while let Some(rel_open) = lower[cursor..].find("<script") {
387 let abs_open = cursor + rel_open;
388 let tag_end = find_html_tag_end(&lower, abs_open);
393 let tag = &lower[abs_open..tag_end];
394 cursor = tag_end;
395
396 if !is_jsonld_script_tag(tag) {
397 continue;
398 }
399
400 let Some(close) = find_script_close_skipping_strings(&html[cursor..])
401 else {
402 break;
403 };
404 blocks.push(html[cursor..cursor + close].trim().to_string());
407 cursor += close + "</script>".len();
408 }
409
410 blocks
411}
412
413fn is_jsonld_script_tag(tag: &str) -> bool {
417 extract_attr(tag, "type")
418 .is_some_and(|v| v.eq_ignore_ascii_case("application/ld+json"))
419}
420
421fn extract_attr(tag: &str, name: &str) -> Option<String> {
425 let lower = tag.to_lowercase();
426 let needle = format!("{}=", name.to_lowercase());
427 let idx = lower.find(&needle)?;
428 let pre = lower.as_bytes().get(idx.wrapping_sub(1));
431 let boundary_ok = idx == 0
432 || matches!(pre, Some(b) if b.is_ascii_whitespace() || *b == b'<');
433 if !boundary_ok {
434 return None;
435 }
436 let rest = &tag[idx + needle.len()..];
437 let trimmed = rest.trim_start();
438 if let Some(s) = trimmed.strip_prefix('"') {
439 s.find('"').map(|e| s[..e].to_string())
440 } else if let Some(s) = trimmed.strip_prefix('\'') {
441 s.find('\'').map(|e| s[..e].to_string())
442 } else {
443 let end = trimmed
444 .find(|c: char| c.is_whitespace() || c == '>')
445 .unwrap_or(trimmed.len());
446 Some(trimmed[..end].to_string())
447 }
448}
449
450fn find_script_close_skipping_strings(body: &str) -> Option<usize> {
458 let bytes = body.as_bytes();
459 let needle = b"</script>";
460 let mut i = 0;
461 let mut in_string = false;
462 let mut escape = false;
463 while i < bytes.len() {
464 if in_string {
465 if escape {
466 escape = false;
467 } else if bytes[i] == b'\\' {
468 escape = true;
469 } else if bytes[i] == b'"' {
470 in_string = false;
471 }
472 i += 1;
473 continue;
474 }
475 if bytes[i] == b'"' {
476 in_string = true;
477 i += 1;
478 continue;
479 }
480 if i + needle.len() <= bytes.len()
482 && bytes[i..i + needle.len()].eq_ignore_ascii_case(needle)
483 {
484 return Some(i);
485 }
486 i += 1;
487 }
488 None
489}
490
491fn find_html_tag_end(html: &str, tag_start: usize) -> usize {
495 let bytes = html.as_bytes();
496 let mut i = tag_start;
497 let mut quote: Option<u8> = None;
498 while i < bytes.len() {
499 let b = bytes[i];
500 match quote {
501 Some(q) if b == q => quote = None,
502 Some(_) => {}
503 None => match b {
504 b'"' | b'\'' => quote = Some(b),
505 b'>' => return i + 1,
506 _ => {}
507 },
508 }
509 i += 1;
510 }
511 bytes.len()
512}
513
514fn validate_one(
516 value: &serde_json::Value,
517 errors: &mut Vec<JsonLdValidationError>,
518) {
519 if let Some(graph) = value.get("@graph").and_then(|v| v.as_array()) {
521 for entry in graph {
522 validate_one(entry, errors);
523 }
524 return;
525 }
526
527 if let Some(array) = value.as_array() {
529 for entry in array {
530 validate_one(entry, errors);
531 }
532 return;
533 }
534
535 let schema_type = value
536 .get("@type")
537 .and_then(|v| v.as_str())
538 .unwrap_or("Unknown")
539 .to_string();
540
541 let required: &[&str] = match schema_type.as_str() {
548 "Article" | "NewsArticle" | "BlogPosting" => {
549 &["headline", "datePublished", "author", "image"]
552 }
553 "WebPage" => &["name"],
558 "BreadcrumbList" => &["itemListElement"],
559 "FAQPage" => &["mainEntity"],
560 "LocalBusiness" | "Restaurant" | "Store" => &["name", "address"],
561 "Organization" => &["name", "url"],
562 _ => return,
565 };
566
567 for field in required {
568 match value.get(*field) {
569 None => errors.push(JsonLdValidationError {
570 schema_type: schema_type.clone(),
571 field: (*field).to_string(),
572 reason: "field absent".to_string(),
573 }),
574 Some(serde_json::Value::Null) => {
575 errors.push(JsonLdValidationError {
576 schema_type: schema_type.clone(),
577 field: (*field).to_string(),
578 reason: "field is null".to_string(),
579 });
580 }
581 Some(serde_json::Value::String(s)) if s.trim().is_empty() => {
582 errors.push(JsonLdValidationError {
583 schema_type: schema_type.clone(),
584 field: (*field).to_string(),
585 reason: "field is empty string".to_string(),
586 });
587 }
588 Some(serde_json::Value::Array(a)) if a.is_empty() => {
589 errors.push(JsonLdValidationError {
590 schema_type: schema_type.clone(),
591 field: (*field).to_string(),
592 reason: "array is empty".to_string(),
593 });
594 }
595 _ => {}
596 }
597 }
598
599 if schema_type == "BreadcrumbList" {
602 if let Some(items) =
603 value.get("itemListElement").and_then(|v| v.as_array())
604 {
605 for (idx, item) in items.iter().enumerate() {
606 if item.get("position").is_none() {
607 errors.push(JsonLdValidationError {
608 schema_type: schema_type.clone(),
609 field: format!("itemListElement[{idx}].position"),
610 reason: "ListItem missing position".to_string(),
611 });
612 }
613 if item.get("name").is_none() && item.get("item").is_none() {
614 errors.push(JsonLdValidationError {
615 schema_type: schema_type.clone(),
616 field: format!("itemListElement[{idx}].name|item"),
617 reason: "ListItem missing name and item".to_string(),
618 });
619 }
620 }
621 }
622 }
623}
624
625#[cfg(test)]
626#[allow(clippy::unwrap_used, clippy::expect_used)]
627mod tests {
628 use super::*;
629 use std::path::Path;
630 use tempfile::tempdir;
631
632 fn ctx(site: &Path) -> PluginContext {
633 PluginContext::new(
634 Path::new("content"),
635 Path::new("build"),
636 site,
637 Path::new("templates"),
638 )
639 }
640
641 fn cfg() -> JsonLdConfig {
642 JsonLdConfig {
643 base_url: "https://example.com".to_string(),
644 org_name: "Example Org".to_string(),
645 breadcrumbs: true,
646 }
647 }
648
649 #[test]
650 fn name_is_stable() {
651 let p = JsonLdPlugin::new(cfg());
652 assert_eq!(p.name(), "json-ld");
653 }
654
655 #[test]
656 fn from_site_constructs_with_breadcrumbs_enabled() {
657 let p = JsonLdPlugin::from_site("https://x.example", "X");
658 assert_eq!(p.config.base_url, "https://x.example");
659 assert_eq!(p.config.org_name, "X");
660 assert!(p.config.breadcrumbs);
661 }
662
663 #[test]
666 fn article_includes_author_when_provided() {
667 let v = build_article_jsonld(
668 "T",
669 "D",
670 "https://x/p",
671 "Org",
672 "Jane",
673 "",
674 None,
675 None,
676 "en",
677 );
678 assert_eq!(v["author"]["name"], "Jane");
679 assert_eq!(v["author"]["@type"], "Person");
680 }
681
682 #[test]
683 fn article_omits_author_when_empty() {
684 let v = build_article_jsonld(
685 "T",
686 "D",
687 "https://x/p",
688 "Org",
689 "",
690 "",
691 None,
692 None,
693 "en",
694 );
695 assert!(v.get("author").is_none());
696 }
697
698 #[test]
699 fn article_includes_image_when_url_present() {
700 let v = build_article_jsonld(
701 "T",
702 "D",
703 "https://x/p",
704 "Org",
705 "",
706 "https://x/img.png",
707 None,
708 None,
709 "en",
710 );
711 assert_eq!(v["image"]["@type"], "ImageObject");
712 assert_eq!(v["image"]["url"], "https://x/img.png");
713 }
714
715 #[test]
716 fn article_uses_date_published_for_date_modified_fallback() {
717 let dp = "2025-01-01".to_string();
718 let v = build_article_jsonld(
719 "T",
720 "D",
721 "https://x/p",
722 "Org",
723 "",
724 "",
725 Some(&dp),
726 None,
727 "en",
728 );
729 assert_eq!(v["datePublished"], "2025-01-01");
730 assert_eq!(
731 v["dateModified"], "2025-01-01",
732 "missing dateModified should fall back to datePublished"
733 );
734 }
735
736 #[test]
737 fn article_keeps_distinct_date_modified() {
738 let dp = "2025-01-01".to_string();
739 let dm = "2025-06-15".to_string();
740 let v = build_article_jsonld(
741 "T",
742 "D",
743 "https://x/p",
744 "Org",
745 "",
746 "",
747 Some(&dp),
748 Some(&dm),
749 "en",
750 );
751 assert_eq!(v["datePublished"], "2025-01-01");
752 assert_eq!(v["dateModified"], "2025-06-15");
753 }
754
755 #[test]
756 fn article_defaults_lang_to_en_when_empty() {
757 let v = build_article_jsonld(
758 "T",
759 "D",
760 "https://x/p",
761 "Org",
762 "",
763 "",
764 None,
765 None,
766 "",
767 );
768 assert_eq!(v["inLanguage"], "en");
769 }
770
771 #[test]
774 fn webpage_includes_author_image_date_when_present() {
775 let dp = "2025-01-01".to_string();
776 let v = build_webpage_jsonld(
777 "T",
778 "D",
779 "https://x/p",
780 "Jane",
781 "https://x/i.png",
782 Some(&dp),
783 "fr",
784 );
785 assert_eq!(v["@type"], "WebPage");
786 assert_eq!(v["author"]["name"], "Jane");
787 assert_eq!(v["image"]["url"], "https://x/i.png");
788 assert_eq!(v["datePublished"], "2025-01-01");
789 assert_eq!(v["inLanguage"], "fr");
790 }
791
792 #[test]
793 fn webpage_omits_optional_fields_when_empty() {
794 let v = build_webpage_jsonld("T", "D", "https://x/p", "", "", None, "");
795 assert!(v.get("author").is_none());
796 assert!(v.get("image").is_none());
797 assert!(v.get("datePublished").is_none());
798 assert_eq!(v["inLanguage"], "en");
799 }
800
801 #[test]
804 fn breadcrumb_returns_none_for_root_path() {
805 assert!(build_breadcrumb_jsonld("https://x", "/").is_none());
807 assert!(build_breadcrumb_jsonld("https://x", "index.html").is_none());
808 }
809
810 #[test]
811 fn breadcrumb_builds_chain_for_nested_path() {
812 let v = build_breadcrumb_jsonld("https://x", "blog/my-post/index.html")
813 .expect("should produce breadcrumb for nested path");
814 assert_eq!(v["@type"], "BreadcrumbList");
815 let items = v["itemListElement"].as_array().unwrap();
816 assert_eq!(items.len(), 3); assert_eq!(items[0]["name"], "Home");
818 assert_eq!(items[1]["name"], "blog");
819 assert_eq!(items[2]["name"], "my post"); }
821
822 #[test]
823 fn breadcrumb_handles_html_extension_in_part_name() {
824 let v = build_breadcrumb_jsonld("https://x", "page.html").unwrap();
825 let items = v["itemListElement"].as_array().unwrap();
826 assert_eq!(items.len(), 2);
827 assert_eq!(items[1]["name"], "page");
828 }
829
830 #[test]
833 fn build_scripts_picks_article_when_article_tag_present() {
834 let html = r#"<html><head><title>Post</title></head>
835 <body><article>content</article></body></html>"#;
836 let scripts =
837 build_jsonld_scripts(html, "https://x", "p/", "Org", false);
838 assert_eq!(scripts[0]["@type"], "Article");
839 }
840
841 #[test]
842 fn build_scripts_picks_webpage_when_no_article_tag() {
843 let html = "<html><head><title>P</title></head><body>x</body></html>";
844 let scripts =
845 build_jsonld_scripts(html, "https://x", "p/", "Org", false);
846 assert_eq!(scripts[0]["@type"], "WebPage");
847 }
848
849 #[test]
850 fn build_scripts_includes_breadcrumb_when_enabled() {
851 let html = "<html><head><title>P</title></head><body>x</body></html>";
852 let scripts =
853 build_jsonld_scripts(html, "https://x", "blog/post/", "Org", true);
854 assert!(
855 scripts.iter().any(|s| s["@type"] == "BreadcrumbList"),
856 "breadcrumb should be present when enabled and path nested"
857 );
858 }
859
860 #[test]
861 fn build_scripts_skips_breadcrumb_when_disabled() {
862 let html = "<html><head><title>P</title></head><body>x</body></html>";
863 let scripts =
864 build_jsonld_scripts(html, "https://x", "blog/post/", "Org", false);
865 assert!(!scripts.iter().any(|s| s["@type"] == "BreadcrumbList"));
866 }
867
868 #[test]
871 fn after_compile_no_op_when_site_missing() {
872 let dir = tempdir().unwrap();
873 let nope = dir.path().join("nope");
874 JsonLdPlugin::new(cfg()).after_compile(&ctx(&nope)).unwrap();
875 }
876
877 #[test]
878 fn transform_html_injects_jsonld() {
879 let dir = tempdir().unwrap();
880 let c = ctx(dir.path());
881 let html = "<html><head><title>X</title></head><body>x</body></html>";
882 let page_path = dir.path().join("index.html");
883 let after = JsonLdPlugin::new(cfg())
884 .transform_html(html, &page_path, &c)
885 .unwrap();
886 assert!(after.contains("application/ld+json"));
887 assert!(after.contains("\"@type\":\"WebPage\""));
888 }
889
890 #[test]
891 fn transform_html_skips_existing_jsonld() {
892 let dir = tempdir().unwrap();
893 let c = ctx(dir.path());
894 let html = r#"<html><head><script type="application/ld+json">{"@type":"X"}</script><title>X</title></head></html>"#;
895 let page_path = dir.path().join("p.html");
896 let after = JsonLdPlugin::new(cfg())
897 .transform_html(html, &page_path, &c)
898 .unwrap();
899 assert_eq!(after.matches("application/ld+json").count(), 1);
901 assert!(after.contains(r#"{"@type":"X"}"#));
902 }
903
904 #[test]
905 fn transform_html_skips_without_head_tag() {
906 let dir = tempdir().unwrap();
907 let c = ctx(dir.path());
908 let raw = "<!doctype html><html><body>only</body></html>";
909 let page_path = dir.path().join("frag.html");
910 let after = JsonLdPlugin::new(cfg())
911 .transform_html(raw, &page_path, &c)
912 .unwrap();
913 assert_eq!(after, raw);
914 }
915
916 #[test]
919 fn validate_extracts_block() {
920 let html = r#"<html><head>
921 <script type="application/ld+json">
922 {"@context":"https://schema.org","@type":"WebPage",
923 "name":"Hi","url":"https://x.test/","inLanguage":"en"}
924 </script></head><body></body></html>"#;
925 assert!(validate_jsonld(html).is_empty());
926 }
927
928 #[test]
929 fn validate_flags_missing_required_field_on_article() {
930 let html = r#"<script type="application/ld+json">
931 {"@context":"https://schema.org","@type":"Article",
932 "headline":"H","datePublished":"2026-05-10","author":"A"}
933 </script>"#;
934 let errs = validate_jsonld(html);
935 assert!(
936 errs.iter()
937 .any(|e| e.schema_type == "Article" && e.field == "image"),
938 "expected Article.image violation, got {errs:?}"
939 );
940 }
941
942 #[test]
943 fn validate_flags_empty_breadcrumb_list() {
944 let html = r#"<script type="application/ld+json">
945 {"@context":"https://schema.org","@type":"BreadcrumbList",
946 "itemListElement":[]}
947 </script>"#;
948 let errs = validate_jsonld(html);
949 assert!(
950 errs.iter().any(|e| e.field == "itemListElement"),
951 "expected itemListElement empty-array error, got {errs:?}"
952 );
953 }
954
955 #[test]
956 fn validate_breadcrumb_listitem_missing_position() {
957 let html = r#"<script type="application/ld+json">
958 {"@type":"BreadcrumbList",
959 "itemListElement":[{"name":"Home","item":"https://x/"}]}
960 </script>"#;
961 let errs = validate_jsonld(html);
962 assert!(
963 errs.iter()
964 .any(|e| e.field == "itemListElement[0].position"),
965 "expected position-missing error, got {errs:?}"
966 );
967 }
968
969 #[test]
970 fn validate_unparseable_json() {
971 let html = r#"<script type="application/ld+json">{not json}</script>"#;
972 let errs = validate_jsonld(html);
973 assert_eq!(errs.len(), 1);
974 assert_eq!(errs[0].schema_type, "Unparseable");
975 }
976
977 #[test]
978 fn validate_descends_into_graph() {
979 let html = r#"<script type="application/ld+json">
982 {"@context":"https://schema.org","@graph":[
983 {"@type":"Article","headline":"H"}
984 ]}
985 </script>"#;
986 let errs = validate_jsonld(html);
987 assert!(errs
990 .iter()
991 .any(|e| e.schema_type == "Article" && e.field == "datePublished"));
992 assert!(errs
993 .iter()
994 .any(|e| e.schema_type == "Article" && e.field == "author"));
995 assert!(errs
996 .iter()
997 .any(|e| e.schema_type == "Article" && e.field == "image"));
998 }
999
1000 #[test]
1001 fn validate_unknown_type_passes_through() {
1002 let html = r#"<script type="application/ld+json">
1003 {"@type":"CustomThing","foo":"bar"}
1004 </script>"#;
1005 assert!(validate_jsonld(html).is_empty());
1006 }
1007
1008 #[test]
1009 fn validate_handles_multiple_blocks() {
1010 let html = r#"
1011 <script type="application/ld+json">{"@type":"Organization","name":"O","url":"https://o/"}</script>
1012 <script type="application/ld+json">{"@type":"Article","headline":"H"}</script>
1013 "#;
1014 let errs = validate_jsonld(html);
1015 assert_eq!(
1017 errs.iter()
1018 .filter(|e| e.schema_type == "Organization")
1019 .count(),
1020 0
1021 );
1022 assert!(
1023 errs.iter().filter(|e| e.schema_type == "Article").count() >= 3
1024 );
1025 }
1026
1027 #[test]
1030 fn validate_skips_extra_qualified_type() {
1031 let html = r#"<script type="application/ld+json/extra">
1035 {"@type":"Article"}
1036 </script>"#;
1037 assert!(
1038 validate_jsonld(html).is_empty(),
1039 "non-JSON-LD type must not be validated"
1040 );
1041 }
1042
1043 #[test]
1044 fn validate_recognises_type_with_single_quotes() {
1045 let html = r#"<script type='application/ld+json'>
1046 {"@type":"Organization","name":"O","url":"https://o/"}
1047 </script>"#;
1048 assert!(validate_jsonld(html).is_empty());
1049 }
1050
1051 #[test]
1052 fn validate_recognises_type_after_other_attrs() {
1053 let html = r#"<script id="ld1" type="application/ld+json">
1054 {"@type":"Organization","name":"O","url":"https://o/"}
1055 </script>"#;
1056 assert!(validate_jsonld(html).is_empty());
1057 }
1058
1059 #[test]
1062 fn validate_handles_close_script_inside_json_string() {
1063 let html = r#"<script type="application/ld+json">
1067 {"@type":"Article",
1068 "headline":"H","datePublished":"2026-01-01",
1069 "author":"A","image":"https://x/i.png",
1070 "description":"this contains a </script> inside the string and is still valid JSON"}
1071 </script>"#;
1072 let errs = validate_jsonld(html);
1073 assert!(
1078 errs.iter().all(|e| e.schema_type != "Unparseable"),
1079 "no parse errors expected, got {errs:?}"
1080 );
1081 }
1082
1083 #[test]
1084 fn extract_attr_returns_none_when_attribute_absent() {
1085 assert_eq!(extract_attr("<script src=x>", "type"), None);
1086 }
1087
1088 #[test]
1089 fn extract_attr_handles_double_quoted_value() {
1090 assert_eq!(
1091 extract_attr(r#"<script type="application/ld+json">"#, "type"),
1092 Some("application/ld+json".to_string())
1093 );
1094 }
1095
1096 #[test]
1097 fn extract_attr_rejects_substring_match_in_other_attribute() {
1098 assert_eq!(extract_attr(r#"<script data-mytype="foo">"#, "type"), None);
1100 }
1101}