1use super::helpers::rfc2822_to_iso8601;
7use crate::plugin::{Plugin, PluginContext};
8use anyhow::Result;
9use std::path::Path;
10
11#[derive(Debug, Clone, Copy)]
17pub struct HtmlFixPlugin;
18
19impl Plugin for HtmlFixPlugin {
20 fn name(&self) -> &'static str {
21 "html-fix"
22 }
23
24 fn has_transform(&self) -> bool {
25 true
26 }
27
28 fn transform_html(
29 &self,
30 html: &str,
31 _path: &Path,
32 _ctx: &PluginContext,
33 ) -> Result<String> {
34 Ok(apply_html_fixes(html))
35 }
36
37 fn after_compile(&self, _ctx: &PluginContext) -> Result<()> {
38 Ok(())
39 }
40}
41
42fn apply_html_fixes(html: &str) -> String {
44 let mut modified = html.to_string();
45
46 if needs_schema_context_fix(&modified) {
47 modified = modified
48 .replace("\"http://schema.org/\"", "\"https://schema.org\"")
49 .replace("\"http://schema.org\"", "\"https://schema.org\"");
50 }
51
52 if modified.contains("application/ld+json") {
53 modified = fix_jsonld_dates(&modified);
54 }
55
56 if modified.contains("<p src=") {
57 modified = fix_broken_img_tags(&modified);
58 }
59
60 if needs_class_syntax_fix(&modified) {
61 modified = fix_literal_class_syntax(&modified);
62 }
63
64 if needs_mobile_web_app_capable_meta(&modified) {
65 modified = inject_mobile_web_app_capable_meta(&modified);
66 }
67
68 if has_empty_preload(&modified) {
69 modified = remove_empty_preload_links(&modified);
70 }
71
72 modified
73}
74
75fn needs_schema_context_fix(html: &str) -> bool {
77 html.contains("\"http://schema.org/\"")
78 || html.contains("\"http://schema.org\"")
79}
80
81fn needs_class_syntax_fix(html: &str) -> bool {
83 html.contains(".class="") || html.contains(".class=\"")
84}
85
86fn has_empty_preload(html: &str) -> bool {
92 let has_preload = html.contains("rel=preload")
97 || html.contains("rel=\"preload\"")
98 || html.contains("rel='preload'");
99 let has_empty_href = html.contains("href=\"\"")
100 || html.contains("href=''")
101 || html.contains(" href ")
102 || html.contains(" href>")
103 || html.contains(" href/>");
104 has_preload && has_empty_href
105}
106
107pub(super) fn remove_empty_preload_links(html: &str) -> String {
110 let mut out = String::with_capacity(html.len());
111 let mut cursor = 0;
112 while cursor < html.len() {
113 let Some(rel_offset) =
115 html[cursor..].to_ascii_lowercase().find("<link")
116 else {
117 out.push_str(&html[cursor..]);
118 break;
119 };
120 let tag_start = cursor + rel_offset;
121 out.push_str(&html[cursor..tag_start]);
122
123 let bytes = html.as_bytes();
125 let mut j = tag_start;
126 let mut quote: Option<u8> = None;
127 while j < bytes.len() {
128 let b = bytes[j];
129 match quote {
130 Some(q) if b == q => quote = None,
131 Some(_) => {}
132 None => match b {
133 b'"' | b'\'' => quote = Some(b),
134 b'>' => break,
135 _ => {}
136 },
137 }
138 j += 1;
139 }
140 let tag_end = (j + 1).min(html.len());
141 let tag = &html[tag_start..tag_end];
142 let lower = tag.to_ascii_lowercase();
143 let is_preload = lower.contains("rel=\"preload\"")
144 || lower.contains("rel='preload'")
145 || lower.contains("rel=preload");
146 let has_real_href = href_is_present_and_non_empty(&lower);
147 if !is_preload || has_real_href {
149 out.push_str(tag);
150 }
151 cursor = tag_end;
152 }
153 out
154}
155
156fn href_is_present_and_non_empty(lower_tag: &str) -> bool {
159 if lower_tag.contains("href=\"\"") || lower_tag.contains("href=''") {
160 return false;
161 }
162 let Some(idx) = lower_tag.find("href") else {
163 return false;
164 };
165 let after = lower_tag[idx + 4..].trim_start();
167 let Some(rest) = after.strip_prefix('=') else {
168 return false;
169 };
170 let rest = rest.trim_start();
171 match rest.chars().next() {
172 None | Some('>') => false,
173 Some('"') => rest.len() > 1 && !rest.starts_with("\"\""),
174 Some('\'') => rest.len() > 1 && !rest.starts_with("''"),
175 Some(c) if c.is_whitespace() => false,
176 Some(_) => true,
177 }
178}
179
180fn needs_mobile_web_app_capable_meta(html: &str) -> bool {
186 let has_legacy = html.contains("apple-mobile-web-app-capable");
187 let has_modern = find_modern_mobile_web_app_capable(html).is_some();
188 has_legacy && !has_modern
189}
190
191fn find_modern_mobile_web_app_capable(html: &str) -> Option<usize> {
194 let needles = [
198 "name=\"mobile-web-app-capable\"",
199 "name='mobile-web-app-capable'",
200 "name=mobile-web-app-capable",
201 ];
202 for n in &needles {
203 if let Some(pos) = html.find(n) {
204 return Some(pos);
205 }
206 }
207 None
208}
209
210pub(super) fn inject_mobile_web_app_capable_meta(html: &str) -> String {
215 let modern = "<meta name=\"mobile-web-app-capable\" content=\"yes\">";
216 let candidates = [
218 "name=\"apple-mobile-web-app-capable\"",
219 "name='apple-mobile-web-app-capable'",
220 "name=apple-mobile-web-app-capable",
221 ];
222 let name_pos = candidates.iter().find_map(|n| html.find(n));
223 let Some(name_pos) = name_pos else {
224 return html.to_string();
225 };
226 let after = &html[name_pos..];
228 let Some(rel_close) = after.find('>') else {
229 return html.to_string();
230 };
231 let insert_at = name_pos + rel_close + 1;
232 format!("{}{modern}{}", &html[..insert_at], &html[insert_at..])
233}
234
235pub(super) fn fix_jsonld_dates(html: &str) -> String {
237 let mut result = html.to_string();
238
239 for field in &["datePublished", "dateModified"] {
241 let pattern = format!("\"{field}\":\"");
242 let mut search_from = 0;
243 while let Some(start) = result[search_from..].find(&pattern) {
244 let abs_start = search_from + start + pattern.len();
245 if let Some(end) = result[abs_start..].find('"') {
246 let date_str = &result[abs_start..abs_start + end];
247 if date_str.len() > 5
250 && date_str.as_bytes()[3] == b','
251 && date_str.as_bytes()[0].is_ascii_alphabetic()
252 {
253 let iso = rfc2822_to_iso8601(date_str);
254 if iso != date_str {
255 result = format!(
256 "{}{}{}",
257 &result[..abs_start],
258 iso,
259 &result[abs_start + end..]
260 );
261 }
262 }
263 search_from = abs_start + 1;
264 } else {
265 break;
266 }
267 }
268 }
269
270 result
271}
272
273pub(super) fn fix_broken_img_tags(html: &str) -> String {
276 let mut result = html.to_string();
277 while let Some(p_pos) = result.find("<p src=") {
280 let before = &result[..p_pos];
282 if let Some(img_start) = before.rfind("<img") {
283 let after_p = &result[p_pos..]; if let Some(quote_start) = after_p.find("src=\"") {
286 let val_start = quote_start + 5; let remaining = &after_p[val_start..];
288 if let Some(quote_end) = remaining.find('"') {
289 let src_value = remaining[..quote_end].to_string();
290 let close_offset = remaining[quote_end..]
292 .find('>')
293 .map_or(result.len(), |i| {
294 p_pos + val_start + quote_end + i + 1
295 });
296
297 let img_attrs = result[img_start + 4..p_pos].trim();
299 let img_attrs_clean =
300 img_attrs.trim_end_matches(|c: char| {
301 c.is_whitespace() || c == '<'
302 });
303
304 let new_img = format!(
305 "<img {img_attrs_clean} src=\"{src_value}\" />"
306 );
307 result = format!(
308 "{}{}{}",
309 &result[..img_start],
310 new_img,
311 &result[close_offset..]
312 );
313 continue;
314 }
315 }
316 }
317 break;
319 }
320 result
321}
322
323pub(super) fn fix_literal_class_syntax(html: &str) -> String {
326 let mut result = html.to_string();
327
328 result = fix_class_syntax_variant(&result, ".class="", """);
330 result = fix_class_syntax_variant(&result, ".class=\"", "\"");
332
333 result
334}
335
336fn fix_class_syntax_variant(
338 html: &str,
339 open_pattern: &str,
340 close_pattern: &str,
341) -> String {
342 let mut result = html.to_string();
343 while let Some(start) = result.find(open_pattern) {
344 let after = &result[start + open_pattern.len()..];
345 if let Some(end) = after.find(close_pattern) {
346 let class_value = after[..end].to_string();
347 let remove_end =
348 start + open_pattern.len() + end + close_pattern.len();
349 result = format!("{}{}", &result[..start], &result[remove_end..]);
350 inject_class_attr(&mut result, start, &class_value);
351 } else {
352 break;
353 }
354 }
355 result
356}
357
358fn inject_class_attr(html: &mut String, pos: usize, class_value: &str) {
360 if let Some(tag_end) = html[..pos].rfind('>') {
361 if let Some(tag_start) = html[..tag_end].rfind('<') {
362 let tag = &html[tag_start..tag_end];
363 if !tag.contains("class=") {
364 let insert_pos = tag_end;
365 *html = format!(
366 "{} class=\"{}\"{}",
367 &html[..insert_pos],
368 class_value,
369 &html[insert_pos..]
370 );
371 }
372 }
373 }
374}
375
376#[cfg(test)]
377#[allow(clippy::unwrap_used, clippy::expect_used)]
378mod tests {
379 use super::*;
380 use crate::plugin::PluginContext;
381 use std::path::Path;
382 use tempfile::tempdir;
383
384 fn test_ctx(site_dir: &Path) -> PluginContext {
385 crate::test_support::init_logger();
386 PluginContext::new(
387 Path::new("content"),
388 Path::new("build"),
389 site_dir,
390 Path::new("templates"),
391 )
392 }
393
394 #[test]
395 fn test_html_fix_upgrades_jsonld_context() -> Result<()> {
396 let tmp = tempdir()?;
397 let ctx = test_ctx(tmp.path());
398
399 let html = r#"<html><head>
400<script type="application/ld+json">
401{"@context":"http://schema.org/","@type":"WebPage"}
402</script>
403</head><body></body></html>"#;
404
405 let result = HtmlFixPlugin.transform_html(
406 html,
407 Path::new("index.html"),
408 &ctx,
409 )?;
410 assert!(result.contains("\"https://schema.org\""));
411 assert!(!result.contains("\"http://schema.org/\""));
412 Ok(())
413 }
414
415 #[test]
416 fn test_html_fix_converts_jsonld_dates() -> Result<()> {
417 let tmp = tempdir()?;
418 let ctx = test_ctx(tmp.path());
419
420 let html = r#"<html><head>
421<script type="application/ld+json">
422{"@context":"https://schema.org","@type":"Article","datePublished":"Thu, 11 Apr 2026 06:06:06 +0000","dateModified":"Mon, 01 Sep 2025 06:06:06 +0000"}
423</script>
424</head><body></body></html>"#;
425
426 let result = HtmlFixPlugin.transform_html(
427 html,
428 Path::new("article.html"),
429 &ctx,
430 )?;
431 assert!(
432 result.contains("\"datePublished\":\"2026-04-11"),
433 "Expected ISO date, got: {result}"
434 );
435 assert!(
436 result.contains("\"dateModified\":\"2025-09-01"),
437 "Expected ISO date, got: {result}"
438 );
439 assert!(!result.contains("Thu, 11 Apr"));
440 Ok(())
441 }
442
443 #[test]
444 fn test_fix_broken_img_tags() {
445 let input =
446 r#"<img alt="test" class="w-25" title="test" <p src="image.jpg">"#;
447 let result = fix_broken_img_tags(input);
448 assert!(result.contains("src=\"image.jpg\""));
449 assert!(!result.contains("<p src="));
450 }
451
452 #[test]
453 fn test_fix_literal_class_syntax() {
454 let input = r#"<img alt="test" src="img.jpg">.class="w-25 float-start""#;
455 let result = fix_literal_class_syntax(input);
456 assert!(!result.contains(".class=""));
457 }
458
459 #[test]
464 fn test_fix_jsonld_dates_iso_passthrough() {
465 let input =
466 r#"{"datePublished":"2026-04-11","dateModified":"2025-09-01"}"#;
467 let result = fix_jsonld_dates(input);
468 assert_eq!(result, input, "ISO dates should pass through unchanged");
469 }
470
471 #[test]
472 fn test_fix_jsonld_dates_converts_rfc2822() {
473 let input = r#"{"datePublished":"Thu, 11 Apr 2026 06:06:06 +0000"}"#;
474 let result = fix_jsonld_dates(input);
475 assert!(
476 result.contains("\"datePublished\":\"2026-04-11T06:06:06+00:00\""),
477 "Should convert RFC 2822 to ISO 8601, got: {result}"
478 );
479 }
480
481 #[test]
482 fn test_fix_jsonld_dates_both_fields() {
483 let input = r#"{"datePublished":"Mon, 01 Sep 2025 12:00:00 +0000","dateModified":"Tue, 02 Sep 2025 14:30:00 +0000"}"#;
484 let result = fix_jsonld_dates(input);
485 assert!(result.contains("2025-09-01T12:00:00+00:00"));
486 assert!(result.contains("2025-09-02T14:30:00+00:00"));
487 }
488
489 #[test]
494 fn test_fix_broken_img_tags_multiple() {
495 let input =
496 r#"<img alt="a" <p src="one.jpg"><img alt="b" <p src="two.jpg">"#;
497 let result = fix_broken_img_tags(input);
498 assert!(result.contains("src=\"one.jpg\""), "first img: {result}");
499 assert!(result.contains("src=\"two.jpg\""), "second img: {result}");
500 assert!(
501 !result.contains("<p src="),
502 "no broken tags remain: {result}"
503 );
504 }
505
506 #[test]
507 fn test_fix_broken_img_tags_none() {
508 let input = r#"<img alt="ok" src="good.jpg" />"#;
509 let result = fix_broken_img_tags(input);
510 assert_eq!(
511 result, input,
512 "No broken tags should leave input unchanged"
513 );
514 }
515
516 #[test]
521 fn test_fix_literal_class_syntax_html_encoded() {
522 let input =
523 r#"<img src="img.jpg">.class="w-25 float-start" rest"#;
524 let result = fix_literal_class_syntax(input);
525 assert!(
526 !result.contains(".class=""),
527 "should remove .class=""
528 );
529 assert!(
530 result.contains("class=\"w-25 float-start\""),
531 "should inject class attr, got: {result}"
532 );
533 }
534
535 #[test]
536 fn test_fix_literal_class_syntax_literal_quotes() {
537 let input = r#"<img src="img.jpg">.class="my-class" rest"#;
538 let result = fix_literal_class_syntax(input);
539 assert!(
540 !result.contains(".class=\""),
541 "should remove .class=\", got: {result}"
542 );
543 assert!(
544 result.contains("class=\"my-class\""),
545 "should inject class attr, got: {result}"
546 );
547 }
548
549 #[test]
550 fn test_fix_literal_class_syntax_no_class() {
551 let input = r#"<img src="img.jpg"> some text"#;
552 let result = fix_literal_class_syntax(input);
553 assert_eq!(result, input, "No .class= should leave input unchanged");
554 }
555
556 #[test]
561 fn test_inject_mobile_web_app_capable_meta_added() {
562 let input = r#"<head><meta name="apple-mobile-web-app-capable" content="yes"></head>"#;
563 let result = inject_mobile_web_app_capable_meta(input);
564 assert!(
565 result.contains(
566 r#"<meta name="mobile-web-app-capable" content="yes">"#
567 ),
568 "modern meta should be injected, got: {result}"
569 );
570 assert!(
571 result.contains(
572 r#"<meta name="apple-mobile-web-app-capable" content="yes">"#
573 ),
574 "legacy meta must remain for backwards compatibility"
575 );
576 }
577
578 #[test]
583 fn test_remove_empty_preload_drops_bare_href() {
584 let input = r#"<head><link as=image fetchpriority=high href rel=preload type=image/webp><title>x</title></head>"#;
585 let result = remove_empty_preload_links(input);
586 assert!(
587 !result.contains("rel=preload"),
588 "empty preload should be removed, got: {result}"
589 );
590 assert!(result.contains("<title>x</title>"), "rest preserved");
591 }
592
593 #[test]
594 fn test_remove_empty_preload_drops_quoted_empty_href() {
595 let input = r#"<link rel="preload" href="" as="image">"#;
596 let result = remove_empty_preload_links(input);
597 assert_eq!(result, "");
598 }
599
600 #[test]
601 fn test_remove_empty_preload_keeps_valid_preload() {
602 let input = r#"<link rel="preload" href="/banner.webp" as="image">"#;
603 let result = remove_empty_preload_links(input);
604 assert_eq!(result, input);
605 }
606
607 #[test]
608 fn test_remove_empty_preload_preserves_utf8() {
609 let input = r#"<title>日本語</title><link rel=preload href as=image><p>テスト</p>"#;
610 let result = remove_empty_preload_links(input);
611 assert!(result.contains("日本語"));
612 assert!(result.contains("テスト"));
613 assert!(!result.contains("rel=preload"));
614 }
615
616 #[test]
617 fn test_apply_html_fixes_idempotent_on_modern_meta() {
618 let input = r#"<head><meta name="apple-mobile-web-app-capable" content="yes"><meta name="mobile-web-app-capable" content="yes"></head>"#;
619 let result = apply_html_fixes(input);
620 let count = result.matches("name=\"mobile-web-app-capable\"").count();
622 assert_eq!(count, 1, "no duplicate injection, got: {result}");
623 }
624}