1use crate::plugin::{Plugin, PluginContext};
21use anyhow::Result;
22use std::{fs, path::Path, process::Command};
23
24#[derive(Debug, Clone)]
26pub struct LlmConfig {
27 pub model: String,
29 pub endpoint: String,
31 pub dry_run: bool,
33 pub target_grade: f64,
35 pub max_refinement_attempts: usize,
37}
38
39impl Default for LlmConfig {
40 fn default() -> Self {
41 Self {
42 model: "llama3".to_string(),
43 endpoint: "http://localhost:11434".to_string(),
44 dry_run: false,
45 target_grade: 8.0,
46 max_refinement_attempts: 1,
47 }
48 }
49}
50
51#[derive(Debug)]
53pub struct LlmPlugin {
54 config: LlmConfig,
55}
56
57impl LlmPlugin {
58 #[must_use]
60 pub const fn new(config: LlmConfig) -> Self {
61 Self { config }
62 }
63}
64
65#[derive(Debug, Clone, serde::Serialize)]
67pub struct FileAuditResult {
68 pub path: String,
70 pub grade_level: f64,
72 pub reading_ease: f64,
74 pub avg_sentence_len: f64,
76 pub passes: bool,
78}
79
80#[derive(Debug, Clone, serde::Serialize)]
82pub struct AuditReport {
83 pub target_grade: f64,
85 pub total_files: usize,
87 pub passing: usize,
89 pub failing: usize,
91 pub results: Vec<FileAuditResult>,
93}
94
95#[derive(Debug, Clone, serde::Serialize)]
97pub struct AiFixResult {
98 pub path: String,
100 pub before_grade: f64,
102 pub after_grade: f64,
104 pub improved: bool,
106 pub action: String,
108}
109
110#[derive(Debug, Clone, serde::Serialize)]
112pub struct AiFixReport {
113 pub total_audited: usize,
115 pub total_failing: usize,
117 pub total_fixed: usize,
119 pub results: Vec<AiFixResult>,
121}
122
123impl LlmPlugin {
124 pub fn audit_all(
134 content_dir: &Path,
135 target_grade: f64,
136 ) -> Result<AuditReport> {
137 let md_files =
138 crate::walk::walk_files(content_dir, "md").unwrap_or_default();
139
140 let mut results = Vec::with_capacity(md_files.len());
141
142 for path in &md_files {
143 let Ok(content) = fs::read_to_string(path) else {
144 continue; };
146 let body = strip_frontmatter(&content);
148 let lang = extract_frontmatter_lang(&content);
150 let audit = ReadabilityAudit::analyze_with_lang(&body, &lang);
151 let rel = path
152 .strip_prefix(content_dir)
153 .unwrap_or(path)
154 .to_string_lossy()
155 .to_string();
156
157 results.push(FileAuditResult {
158 path: rel,
159 grade_level: (audit.grade_level * 10.0).round() / 10.0,
160 reading_ease: (audit.reading_ease * 10.0).round() / 10.0,
161 avg_sentence_len: (audit.avg_sentence_len * 10.0).round()
162 / 10.0,
163 passes: audit.grade_level <= target_grade,
164 });
165 }
166
167 let passing = results.iter().filter(|r| r.passes).count();
168 let failing = results.len() - passing;
169
170 Ok(AuditReport {
171 target_grade,
172 total_files: results.len(),
173 passing,
174 failing,
175 results,
176 })
177 }
178
179 pub fn audit_and_fix(
190 content_dir: &Path,
191 config: &LlmConfig,
192 ) -> Result<usize> {
193 if !is_ollama_available(&config.endpoint) {
194 log::warn!(
195 "[llm] Ollama not reachable at {}, skipping auto-fix",
196 config.endpoint
197 );
198 return Ok(0);
199 }
200
201 let report = Self::audit_all(content_dir, config.target_grade)?;
202 let failing: Vec<_> =
203 report.results.iter().filter(|r| !r.passes).collect();
204
205 if failing.is_empty() {
206 log::info!(
207 "[llm] All {} file(s) pass grade {:.0}",
208 report.total_files,
209 config.target_grade
210 );
211 return Ok(0);
212 }
213
214 log::info!(
215 "[llm] {} file(s) exceed grade {:.0}, attempting refinement",
216 failing.len(),
217 config.target_grade
218 );
219
220 let mut rewritten = 0usize;
221
222 for result in &failing {
223 let path = content_dir.join(&result.path);
224 let original = fs::read_to_string(&path)?;
225 let (frontmatter_block, body) = split_frontmatter(&original);
226 let body_trimmed = body.trim();
227
228 if body_trimmed.is_empty() {
229 continue;
230 }
231
232 let prompt = format!(
233 "Rewrite this Markdown content at a 6th-grade reading level. \
234 Rules:\n\
235 - Max 20 words per sentence\n\
236 - Max 4 sentences per paragraph\n\
237 - Use simple, common words\n\
238 - Keep ALL facts, numbers, dates, and code blocks exactly the same\n\
239 - Keep ALL Markdown headings (#, ##, ###) and formatting\n\
240 - Return ONLY the rewritten Markdown, nothing else\n\n\
241 {body_trimmed}"
242 );
243
244 if let Some(refined) = generate_with_refinement(
245 &config.endpoint,
246 &config.model,
247 &prompt,
248 config.target_grade,
249 config.max_refinement_attempts,
250 ) {
251 let refined_audit = ReadabilityAudit::analyze(&refined);
252 let original_audit = ReadabilityAudit::analyze(body_trimmed);
253
254 if refined_audit.grade_level < original_audit.grade_level {
255 if config.dry_run {
256 log::info!(
257 "[llm] [dry-run] {}: grade {:.1} → {:.1}",
258 result.path,
259 original_audit.grade_level,
260 refined_audit.grade_level
261 );
262 } else {
263 let output =
265 format!("{frontmatter_block}\n{refined}\n");
266 fs::write(&path, output)?;
267 log::info!(
268 "[llm] Rewrote {}: grade {:.1} → {:.1}",
269 result.path,
270 original_audit.grade_level,
271 refined_audit.grade_level
272 );
273 rewritten += 1;
274 }
275 } else {
276 log::warn!(
277 "[llm] Could not improve {}: grade {:.1} (refined: {:.1})",
278 result.path,
279 original_audit.grade_level,
280 refined_audit.grade_level
281 );
282 }
283 }
284 }
285
286 Ok(rewritten)
287 }
288
289 pub fn audit_and_fix_with_report(
294 content_dir: &Path,
295 config: &LlmConfig,
296 ) -> Result<AiFixReport> {
297 if !is_ollama_available(&config.endpoint) {
298 log::warn!(
299 "[ai-fix] Ollama not reachable at {}, skipping",
300 config.endpoint
301 );
302 return Ok(AiFixReport {
303 total_audited: 0,
304 total_failing: 0,
305 total_fixed: 0,
306 results: vec![],
307 });
308 }
309
310 let report = Self::audit_all(content_dir, config.target_grade)?;
311 let failing: Vec<_> =
312 report.results.iter().filter(|r| !r.passes).collect();
313 let mut fix_results = Vec::new();
314
315 for result in &failing {
316 let path = content_dir.join(&result.path);
317 let Ok(original) = fs::read_to_string(&path) else {
318 fix_results.push(AiFixResult {
319 path: result.path.clone(),
320 before_grade: result.grade_level,
321 after_grade: result.grade_level,
322 improved: false,
323 action: "skipped".to_string(),
324 });
325 continue;
326 };
327 let (frontmatter_block, body) = split_frontmatter(&original);
328 let body_trimmed = body.trim();
329
330 if body_trimmed.is_empty() {
331 fix_results.push(AiFixResult {
332 path: result.path.clone(),
333 before_grade: result.grade_level,
334 after_grade: result.grade_level,
335 improved: false,
336 action: "skipped".to_string(),
337 });
338 continue;
339 }
340
341 let prompt = format!(
342 "Rewrite this Markdown content at a 6th-grade reading level. \
343 Rules:\n\
344 - Max 20 words per sentence\n\
345 - Max 4 sentences per paragraph\n\
346 - Use simple, common words\n\
347 - Keep ALL facts, numbers, dates, and code blocks exactly the same\n\
348 - Keep ALL Markdown headings (#, ##, ###) and formatting\n\
349 - Return ONLY the rewritten Markdown, nothing else\n\n\
350 {body_trimmed}"
351 );
352
353 if let Some(refined) = generate_with_refinement(
354 &config.endpoint,
355 &config.model,
356 &prompt,
357 config.target_grade,
358 config.max_refinement_attempts,
359 ) {
360 let refined_audit = ReadabilityAudit::analyze(&refined);
361 let original_audit = ReadabilityAudit::analyze(body_trimmed);
362
363 if refined_audit.grade_level < original_audit.grade_level {
364 if !config.dry_run {
365 let output =
366 format!("{frontmatter_block}\n{refined}\n");
367 fs::write(&path, output)?;
368 }
369 fix_results.push(AiFixResult {
370 path: result.path.clone(),
371 before_grade: (original_audit.grade_level * 10.0)
372 .round()
373 / 10.0,
374 after_grade: (refined_audit.grade_level * 10.0).round()
375 / 10.0,
376 improved: true,
377 action: if config.dry_run {
378 "dry-run".to_string()
379 } else {
380 "rewritten".to_string()
381 },
382 });
383 } else {
384 fix_results.push(AiFixResult {
385 path: result.path.clone(),
386 before_grade: (original_audit.grade_level * 10.0)
387 .round()
388 / 10.0,
389 after_grade: (refined_audit.grade_level * 10.0).round()
390 / 10.0,
391 improved: false,
392 action: "no-improvement".to_string(),
393 });
394 }
395 } else {
396 fix_results.push(AiFixResult {
397 path: result.path.clone(),
398 before_grade: result.grade_level,
399 after_grade: result.grade_level,
400 improved: false,
401 action: "skipped".to_string(),
402 });
403 }
404 }
405
406 let total_fixed = fix_results.iter().filter(|r| r.improved).count();
407
408 Ok(AiFixReport {
409 total_audited: report.total_files,
410 total_failing: failing.len(),
411 total_fixed,
412 results: fix_results,
413 })
414 }
415}
416
417fn split_frontmatter(content: &str) -> (String, String) {
423 let trimmed = content.trim_start();
424 let leading_ws = &content[..content.len() - trimmed.len()];
425
426 for delim in ["---", "+++"] {
427 if let Some(rest) = trimmed.strip_prefix(delim) {
428 if let Some(end) = rest.find(delim) {
429 let fm_end = delim.len() + end + delim.len();
430 let frontmatter = &trimmed[..fm_end];
431 let body = &trimmed[fm_end..];
432 return (
433 format!("{leading_ws}{frontmatter}"),
434 body.to_string(),
435 );
436 }
437 }
438 }
439
440 (String::new(), content.to_string())
441}
442
443fn extract_frontmatter_lang(content: &str) -> String {
445 let trimmed = content.trim_start();
446 for delim in ["---", "+++"] {
447 if let Some(rest) = trimmed.strip_prefix(delim) {
448 if let Some(end) = rest.find(delim) {
449 let fm = &rest[..end];
450 for line in fm.lines() {
452 let line = line.trim();
453 for key in ["language:", "lang:"] {
454 if let Some(val) = line.strip_prefix(key) {
455 let val =
456 val.trim().trim_matches('"').trim_matches('\'');
457 if !val.is_empty() {
458 return val.to_string();
459 }
460 }
461 }
462 }
463 for line in fm.lines() {
465 let line = line.trim();
466 for key in ["language", "lang"] {
467 if line.starts_with(key) {
468 if let Some(val) = line.split('=').nth(1) {
469 let val = val
470 .trim()
471 .trim_matches('"')
472 .trim_matches('\'');
473 if !val.is_empty() {
474 return val.to_string();
475 }
476 }
477 }
478 }
479 }
480 }
481 }
482 }
483 String::new()
484}
485
486fn strip_frontmatter(content: &str) -> String {
488 let trimmed = content.trim_start();
489 for delim in ["---", "+++"] {
490 if let Some(rest) = trimmed.strip_prefix(delim) {
491 if let Some(end) = rest.find(delim) {
492 return rest[end + delim.len()..].to_string();
493 }
494 }
495 }
496 content.to_string()
497}
498
499impl Plugin for LlmPlugin {
500 fn name(&self) -> &'static str {
501 "llm"
502 }
503
504 fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
505 if !ctx.site_dir.exists() {
506 return Ok(());
507 }
508
509 if !is_ollama_available(&self.config.endpoint) {
511 log::warn!(
512 "[llm] Ollama not reachable at {}, skipping AI augmentation",
513 self.config.endpoint
514 );
515 return Ok(());
516 }
517
518 let html_files = ctx.get_html_files();
519 let mut augmented = 0usize;
520
521 for path in &html_files {
522 let html = fs::read_to_string(path)?;
523 let mut modified = html.clone();
524
525 if needs_meta_description(&modified) {
527 if let Some(desc) = generate_meta_description(
528 &modified,
529 &self.config.model,
530 &self.config.endpoint,
531 self.config.target_grade,
532 self.config.max_refinement_attempts,
533 ) {
534 let audit = ReadabilityAudit::analyze(&desc);
535 if self.config.dry_run {
536 let rel = path
537 .strip_prefix(&ctx.site_dir)
538 .unwrap_or(path)
539 .display();
540 log::info!(
541 "[llm] [dry-run] {rel}: description = {desc}"
542 );
543 log::info!(
544 "[llm] [dry-run] {rel}: grade={:.1}, ease={:.1}, avg_sentence={:.1}",
545 audit.grade_level, audit.reading_ease, audit.avg_sentence_len
546 );
547 } else {
548 modified = inject_meta_description(&modified, &desc);
549 modified = inject_jsonld_description(&modified, &desc);
551 }
552 }
553 }
554
555 let alt_count = generate_missing_alt_text(
557 &mut modified,
558 &self.config.model,
559 &self.config.endpoint,
560 self.config.dry_run,
561 path,
562 &ctx.site_dir,
563 );
564
565 if !self.config.dry_run && modified != html {
566 fs::write(path, &modified)?;
567 augmented += 1;
568 }
569
570 if alt_count > 0 {
571 augmented += 1;
572 }
573 }
574
575 if augmented > 0 {
576 log::info!(
577 "[llm] Augmented {augmented} page(s) with model '{}'",
578 self.config.model
579 );
580 }
581
582 Ok(())
583 }
584}
585
586fn is_ollama_available(endpoint: &str) -> bool {
588 Command::new("curl")
590 .args(["-sf", "--max-time", "2", endpoint])
591 .output()
592 .is_ok_and(|o| o.status.success())
593}
594
595fn needs_meta_description(html: &str) -> bool {
597 if let Some(start) = html.find("name=\"description\"") {
598 if let Some(content_start) = html[start..].find("content=\"") {
599 let abs = start + content_start + 9;
600 if let Some(end) = html[abs..].find('"') {
601 let desc = &html[abs..abs + end];
602 return desc.len() < 50;
603 }
604 }
605 }
606 !html.contains("name=\"description\"")
608}
609
610fn generate_meta_description(
612 html: &str,
613 model: &str,
614 endpoint: &str,
615 target_grade: f64,
616 max_attempts: usize,
617) -> Option<String> {
618 let text = extract_page_text(html, 500);
619 if text.len() < 20 {
620 return None;
621 }
622
623 let prompt = format!(
624 "Write a concise SEO meta description (120-155 characters) for this page content. \
625 Use simple words and short sentences. \
626 Return ONLY the description text, no quotes or explanation:\n\n{text}"
627 );
628
629 generate_with_refinement(
630 endpoint,
631 model,
632 &prompt,
633 target_grade,
634 max_attempts,
635 )
636}
637
638fn inject_meta_description(html: &str, description: &str) -> String {
640 let escaped = description
641 .replace('&', "&")
642 .replace('"', """)
643 .replace('<', "<");
644 let tag = format!("<meta name=\"description\" content=\"{escaped}\">\n");
645
646 if let Some(pos) = html.find("</head>") {
647 let mut result = html.to_string();
648 result.insert_str(pos, &tag);
649 result
650 } else {
651 html.to_string()
652 }
653}
654
655fn generate_missing_alt_text(
657 html: &mut String,
658 model: &str,
659 endpoint: &str,
660 dry_run: bool,
661 path: &Path,
662 site_dir: &Path,
663) -> usize {
664 let mut count = 0;
665 let mut search_from = 0;
666
667 while let Some(start) = html[search_from..].find("<img") {
668 let abs_start = search_from + start;
669 let Some(tag_end) = html[abs_start..].find('>') else {
670 break;
671 };
672 let tag_end_abs = abs_start + tag_end + 1;
673 let tag = &html[abs_start..tag_end_abs];
674
675 if !tag.contains("alt=") || tag.contains("alt=\"\"") {
676 let src = extract_attr(tag, "src").unwrap_or_default();
678 let prompt = format!(
679 "Describe this image for an alt text attribute. The image file is named '{}'. \
680 Return ONLY the alt text (max 125 characters), no quotes:\n",
681 src
682 );
683
684 if let Some(alt) = call_ollama(endpoint, model, &prompt) {
685 let alt = alt.trim().replace('"', """);
686 if dry_run {
687 let rel =
688 path.strip_prefix(site_dir).unwrap_or(path).display();
689 log::info!(
690 "[llm] [dry-run] {rel}: alt=\"{alt}\" for {src}"
691 );
692 } else {
693 let new_tag = if tag.contains("alt=\"\"") {
695 tag.replace("alt=\"\"", &format!("alt=\"{alt}\""))
696 } else {
697 tag.replace("<img", &format!("<img alt=\"{alt}\""))
698 };
699 html.replace_range(abs_start..tag_end_abs, &new_tag);
700 }
701 count += 1;
702 }
703 }
704
705 search_from = tag_end_abs;
706 }
707
708 count
709}
710
711fn extract_page_text(html: &str, max_chars: usize) -> String {
713 let body_start = html
714 .find("<main")
715 .or_else(|| html.find("<body"))
716 .unwrap_or(0);
717 let body = &html[body_start..];
718
719 let mut text = String::with_capacity(max_chars + 50);
720 let mut in_tag = false;
721 for ch in body.chars() {
722 if text.len() >= max_chars {
723 break;
724 }
725 match ch {
726 '<' => in_tag = true,
727 '>' => in_tag = false,
728 _ if !in_tag && !ch.is_control() => text.push(ch),
729 _ => {}
730 }
731 }
732
733 text.split_whitespace().collect::<Vec<_>>().join(" ")
734}
735
736fn extract_attr(tag: &str, attr: &str) -> Option<String> {
738 let pattern = format!("{attr}=\"");
739 let start = tag.find(&pattern)? + pattern.len();
740 let end = tag[start..].find('"')? + start;
741 Some(tag[start..end].to_string())
742}
743
744#[derive(Debug, Clone, Copy, PartialEq, Eq)]
753#[non_exhaustive]
754pub enum ReadabilityFormula {
755 FleschKincaid,
757 KandelMoles,
759 WienerSachtextformel,
761 Gulpease,
763 Lix,
765 FernandezHuerta,
767}
768
769impl ReadabilityFormula {
770 #[must_use]
775 pub fn from_lang(lang: &str) -> Option<Self> {
776 let primary = lang.split(['-', '_']).next().unwrap_or(lang);
777 match primary.to_lowercase().as_str() {
778 "en" => Some(Self::FleschKincaid),
779 "fr" => Some(Self::KandelMoles),
780 "de" => Some(Self::WienerSachtextformel),
781 "it" => Some(Self::Gulpease),
782 "sv" | "nb" | "nn" | "da" | "no" => Some(Self::Lix),
783 "es" => Some(Self::FernandezHuerta),
784 _ => None,
785 }
786 }
787}
788
789#[derive(Debug, Clone, Copy)]
791pub struct ReadabilityAudit {
792 pub grade_level: f64,
794 pub reading_ease: f64,
796 pub avg_sentence_len: f64,
798}
799
800impl ReadabilityAudit {
801 #[must_use]
803 pub fn analyze(text: &str) -> Self {
804 let words = count_words(text);
805 let sentences = count_sentences(text);
806 let syllables = count_syllables(text);
807
808 if words == 0 || sentences == 0 {
809 return Self {
810 grade_level: 0.0,
811 reading_ease: 100.0,
812 avg_sentence_len: 0.0,
813 };
814 }
815
816 let wps = words as f64 / sentences as f64;
817 let spw = syllables as f64 / words as f64;
818
819 let grade = 0.39f64.mul_add(wps, 11.8f64.mul_add(spw, -15.59));
820 let ease = (-1.015f64).mul_add(wps, (-84.6f64).mul_add(spw, 206.835));
821
822 Self {
823 grade_level: grade.max(0.0),
824 reading_ease: ease.clamp(0.0, 100.0),
825 avg_sentence_len: wps,
826 }
827 }
828
829 #[must_use]
833 pub fn analyze_with_lang(text: &str, lang: &str) -> Self {
834 let formula = if lang.is_empty() {
835 ReadabilityFormula::FleschKincaid
836 } else {
837 ReadabilityFormula::from_lang(lang)
838 .unwrap_or(ReadabilityFormula::FleschKincaid)
839 };
840
841 let words = count_words(text);
842 let sentences = count_sentences(text);
843 let syllables = count_syllables(text);
844 let chars: usize = text.chars().filter(|c| c.is_alphanumeric()).count();
845
846 if words == 0 || sentences == 0 {
847 return Self {
848 grade_level: 0.0,
849 reading_ease: 100.0,
850 avg_sentence_len: 0.0,
851 };
852 }
853
854 let wps = words as f64 / sentences as f64;
855 let spw = syllables as f64 / words as f64;
856
857 match formula {
858 ReadabilityFormula::FleschKincaid => Self::analyze(text),
859
860 ReadabilityFormula::KandelMoles => {
861 let ease = 68.0f64.mul_add(-spw, 1.15f64.mul_add(-wps, 209.0));
863 Self {
864 grade_level: ((100.0 - ease.clamp(0.0, 100.0)) / 10.0)
865 .max(0.0),
866 reading_ease: ease.clamp(0.0, 100.0),
867 avg_sentence_len: wps,
868 }
869 }
870
871 ReadabilityFormula::WienerSachtextformel => {
872 let word_list: Vec<&str> = text.split_whitespace().collect();
874 let total = word_list.len().max(1) as f64;
875 let pct_3plus_syl = word_list
876 .iter()
877 .filter(|w| count_word_syllables(w) >= 3)
878 .count() as f64
879 / total
880 * 100.0;
881 let pct_6plus_char = word_list
882 .iter()
883 .filter(|w| {
884 w.chars().filter(|c| c.is_alphabetic()).count() > 6
885 })
886 .count() as f64
887 / total
888 * 100.0;
889 let pct_1syl = word_list
890 .iter()
891 .filter(|w| count_word_syllables(w) == 1)
892 .count() as f64
893 / total
894 * 100.0;
895
896 let grade = 0.1935f64.mul_add(
897 pct_3plus_syl,
898 0.1672f64.mul_add(
899 wps,
900 (-0.1297f64).mul_add(
901 pct_6plus_char,
902 (-0.0327f64).mul_add(pct_1syl, -0.875),
903 ),
904 ),
905 );
906
907 Self {
908 grade_level: grade.max(0.0),
909 reading_ease: grade
910 .clamp(0.0, 20.0)
911 .mul_add(-5.0, 100.0)
912 .clamp(0.0, 100.0),
913 avg_sentence_len: wps,
914 }
915 }
916
917 ReadabilityFormula::Gulpease => {
918 let ease = 89.0
920 + 10.0f64
921 .mul_add(-(chars as f64), 300.0 * sentences as f64)
922 / words as f64;
923 Self {
924 grade_level: ((100.0 - ease.clamp(0.0, 100.0)) / 10.0)
925 .max(0.0),
926 reading_ease: ease.clamp(0.0, 100.0),
927 avg_sentence_len: wps,
928 }
929 }
930
931 ReadabilityFormula::Lix => {
932 let word_list: Vec<&str> = text.split_whitespace().collect();
934 let total = word_list.len().max(1) as f64;
935 let long_words = word_list
936 .iter()
937 .filter(|w| {
938 w.chars().filter(|c| c.is_alphabetic()).count() > 6
939 })
940 .count() as f64;
941 let lix = wps + 100.0 * long_words / total;
942 Self {
945 grade_level: (lix / 5.0).max(0.0),
946 reading_ease: (100.0 - lix).clamp(0.0, 100.0),
947 avg_sentence_len: wps,
948 }
949 }
950
951 ReadabilityFormula::FernandezHuerta => {
952 let ease =
954 1.02f64.mul_add(-wps, (-60.0f64).mul_add(spw, 206.84));
955 Self {
956 grade_level: ((100.0 - ease.clamp(0.0, 100.0)) / 10.0)
957 .max(0.0),
958 reading_ease: ease.clamp(0.0, 100.0),
959 avg_sentence_len: wps,
960 }
961 }
962 }
963 }
964}
965
966fn count_words(text: &str) -> usize {
968 text.split_whitespace().count()
969}
970
971fn count_sentences(text: &str) -> usize {
973 text.chars()
974 .filter(|&c| c == '.' || c == '!' || c == '?')
975 .count()
976 .max(1)
977}
978
979fn count_syllables(text: &str) -> usize {
984 text.split_whitespace()
985 .map(|word| count_word_syllables(word))
986 .sum()
987}
988
989fn count_word_syllables(word: &str) -> usize {
991 let word = word.to_lowercase();
992 let chars: Vec<char> = word.chars().filter(|c| c.is_alphabetic()).collect();
993 if chars.is_empty() {
994 return 1;
995 }
996
997 let vowels = b"aeiouy";
998 let mut count = 0usize;
999 let mut prev_vowel = false;
1000
1001 for &ch in &chars {
1002 let is_vowel = vowels.contains(&(ch as u8));
1003 if is_vowel && !prev_vowel {
1004 count += 1;
1005 }
1006 prev_vowel = is_vowel;
1007 }
1008
1009 if chars.len() > 2 && chars.last() == Some(&'e') && count > 1 {
1011 count -= 1;
1012 }
1013
1014 count.max(1)
1015}
1016
1017fn generate_with_refinement(
1022 endpoint: &str,
1023 model: &str,
1024 prompt: &str,
1025 target_grade: f64,
1026 max_attempts: usize,
1027) -> Option<String> {
1028 let mut text = call_ollama(endpoint, model, prompt)?;
1029 let mut audit = ReadabilityAudit::analyze(&text);
1030
1031 for attempt in 0..max_attempts {
1032 if audit.grade_level <= target_grade {
1033 break;
1034 }
1035
1036 log::info!(
1037 "[llm] Grade {:.1} exceeds target {:.1}, refining (attempt {})",
1038 audit.grade_level,
1039 target_grade,
1040 attempt + 1
1041 );
1042
1043 let simplify_prompt = format!(
1044 "Rewrite this text at a 6th-grade reading level. \
1045 Use short sentences (max 20 words). Use simple words. \
1046 Keep all facts and numbers exactly the same. \
1047 Return ONLY the rewritten text:\n\n{text}"
1048 );
1049
1050 if let Some(refined) = call_ollama(endpoint, model, &simplify_prompt) {
1051 let refined_audit = ReadabilityAudit::analyze(&refined);
1052 if refined_audit.grade_level < audit.grade_level {
1053 text = refined;
1054 audit = refined_audit;
1055 }
1056 }
1057 }
1058
1059 Some(text)
1060}
1061
1062fn inject_jsonld_description(html: &str, description: &str) -> String {
1071 if html.contains("\"@type\":\"Article\"")
1073 && html.contains("\"description\"")
1074 {
1075 return html.to_string();
1076 }
1077
1078 let jsonld = serde_json::json!({
1079 "@context": "https://schema.org",
1080 "@type": "Article",
1081 "description": description,
1082 });
1083
1084 let script =
1085 format!("<script type=\"application/ld+json\">{}</script>\n", jsonld);
1086
1087 if let Some(pos) = html.find("</head>") {
1088 let mut result = html.to_string();
1089 result.insert_str(pos, &script);
1090 result
1091 } else {
1092 html.to_string()
1093 }
1094}
1095
1096fn call_ollama(endpoint: &str, model: &str, prompt: &str) -> Option<String> {
1098 let url = format!("{}/api/generate", endpoint.trim_end_matches('/'));
1099 let payload = serde_json::json!({
1100 "model": model,
1101 "prompt": prompt,
1102 "stream": false,
1103 });
1104
1105 let output = Command::new("curl")
1106 .args([
1107 "-sf",
1108 "--max-time",
1109 "30",
1110 "-X",
1111 "POST",
1112 &url,
1113 "-H",
1114 "Content-Type: application/json",
1115 "-d",
1116 &payload.to_string(),
1117 ])
1118 .output()
1119 .ok()?;
1120
1121 if !output.status.success() {
1122 return None;
1123 }
1124
1125 let response: serde_json::Value =
1126 serde_json::from_slice(&output.stdout).ok()?;
1127 response
1128 .get("response")
1129 .and_then(|v| v.as_str())
1130 .map(|s| s.trim().to_string())
1131 .filter(|s| !s.is_empty())
1132}
1133
1134#[cfg(test)]
1135#[allow(clippy::unwrap_used, clippy::expect_used)]
1136mod tests {
1137 use super::*;
1138
1139 #[test]
1140 fn needs_meta_description_missing() {
1141 assert!(needs_meta_description("<html><head></head></html>"));
1142 }
1143
1144 #[test]
1145 fn needs_meta_description_short() {
1146 let html = r#"<html><head><meta name="description" content="Short"></head></html>"#;
1147 assert!(needs_meta_description(html));
1148 }
1149
1150 #[test]
1151 fn needs_meta_description_adequate() {
1152 let html = r#"<html><head><meta name="description" content="This is a sufficiently long meta description that exceeds fifty characters easily"></head></html>"#;
1153 assert!(!needs_meta_description(html));
1154 }
1155
1156 #[test]
1157 fn inject_meta_description_into_head() {
1158 let html = "<html><head><title>T</title></head><body></body></html>";
1159 let result = inject_meta_description(html, "Test description");
1160 assert!(result.contains("name=\"description\""));
1161 assert!(result.contains("Test description"));
1162 }
1163
1164 #[test]
1165 fn extract_attr_basic() {
1166 assert_eq!(
1167 extract_attr(r#"<img src="photo.jpg" alt="x">"#, "src"),
1168 Some("photo.jpg".to_string())
1169 );
1170 }
1171
1172 #[test]
1173 fn extract_attr_missing() {
1174 assert_eq!(extract_attr(r#"<img src="x.jpg">"#, "alt"), None);
1175 }
1176
1177 #[test]
1178 fn extract_page_text_strips_tags() {
1179 let html = "<body><p>Hello <b>world</b></p></body>";
1180 let text = extract_page_text(html, 100);
1181 assert_eq!(text, "Hello world");
1182 }
1183
1184 #[test]
1185 fn llm_plugin_name() {
1186 let plugin = LlmPlugin::new(LlmConfig::default());
1187 assert_eq!(plugin.name(), "llm");
1188 }
1189
1190 #[test]
1193 fn flesch_kincaid_simple_text() {
1194 let audit = ReadabilityAudit::analyze("The cat sat on the mat.");
1196 assert!(
1197 audit.grade_level < 4.0,
1198 "Simple text should be below grade 4, got {:.1}",
1199 audit.grade_level
1200 );
1201 assert!(audit.reading_ease > 80.0);
1202 }
1203
1204 #[test]
1205 fn flesch_kincaid_complex_text() {
1206 let text = "The implementation of sophisticated cryptographic \
1207 algorithms necessitates comprehensive understanding \
1208 of mathematical foundations. Asymmetric encryption \
1209 protocols demonstrate considerable computational \
1210 overhead compared to symmetric alternatives.";
1211 let audit = ReadabilityAudit::analyze(text);
1212 assert!(
1213 audit.grade_level > 12.0,
1214 "Complex text should be above grade 12, got {:.1}",
1215 audit.grade_level
1216 );
1217 }
1218
1219 #[test]
1220 fn flesch_kincaid_empty_text() {
1221 let audit = ReadabilityAudit::analyze("");
1222 assert!(audit.grade_level.abs() < f64::EPSILON);
1223 assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1224 }
1225
1226 #[test]
1227 fn syllable_count_known_words() {
1228 assert_eq!(count_word_syllables("cat"), 1);
1229 assert_eq!(count_word_syllables("hello"), 2);
1230 assert_eq!(count_word_syllables("beautiful"), 3);
1231 assert_eq!(count_word_syllables("implementation"), 5);
1232 }
1233
1234 #[test]
1235 fn count_sentences_basic() {
1236 assert_eq!(count_sentences("Hello. World!"), 2);
1237 assert_eq!(count_sentences("One sentence"), 1); assert_eq!(count_sentences("A? B? C!"), 3);
1239 }
1240
1241 #[test]
1244 fn inject_jsonld_adds_article_block() {
1245 let html = "<html><head><title>T</title></head><body></body></html>";
1246 let result = inject_jsonld_description(html, "Test desc");
1247 assert!(result.contains("application/ld+json"));
1248 assert!(result.contains("\"@type\":\"Article\""));
1249 assert!(result.contains("Test desc"));
1250 }
1251
1252 #[test]
1253 fn inject_jsonld_skips_existing() {
1254 let html = r#"<html><head><script type="application/ld+json">{"@type":"Article","description":"Existing"}</script></head></html>"#;
1255 let result = inject_jsonld_description(html, "New desc");
1256 assert!(!result.contains("New desc"));
1257 assert!(result.contains("Existing"));
1258 }
1259
1260 #[test]
1263 fn audit_all_scans_markdown_files() {
1264 let dir = tempfile::tempdir().unwrap();
1265 let content = dir.path().join("content");
1266 fs::create_dir_all(&content).unwrap();
1267
1268 fs::write(
1269 content.join("simple.md"),
1270 "---\ntitle: Simple\n---\nThe cat sat on the mat. It was a good day.",
1271 )
1272 .unwrap();
1273 fs::write(
1274 content.join("complex.md"),
1275 "---\ntitle: Complex\n---\n\
1276 The implementation of sophisticated cryptographic algorithms \
1277 necessitates comprehensive understanding of mathematical \
1278 foundations and computational complexity theory.",
1279 )
1280 .unwrap();
1281
1282 let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
1283 assert_eq!(report.total_files, 2);
1284 assert!(report.failing > 0, "complex.md should fail grade 8");
1285 }
1286
1287 #[test]
1288 fn audit_all_empty_dir() {
1289 let dir = tempfile::tempdir().unwrap();
1290 let content = dir.path().join("empty");
1291 fs::create_dir_all(&content).unwrap();
1292
1293 let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
1294 assert_eq!(report.total_files, 0);
1295 assert_eq!(report.failing, 0);
1296 }
1297
1298 #[test]
1299 fn strip_frontmatter_yaml() {
1300 let input = "---\ntitle: Hello\n---\nBody text here.";
1301 let body = strip_frontmatter(input);
1302 assert!(body.contains("Body text here"));
1303 assert!(!body.contains("title:"));
1304 }
1305
1306 #[test]
1307 fn strip_frontmatter_toml() {
1308 let input = "+++\ntitle = \"Hello\"\n+++\nBody text here.";
1309 let body = strip_frontmatter(input);
1310 assert!(body.contains("Body text here"));
1311 assert!(!body.contains("title"));
1312 }
1313
1314 #[test]
1315 fn strip_frontmatter_none() {
1316 let input = "Just plain content.";
1317 assert_eq!(strip_frontmatter(input), input);
1318 }
1319
1320 #[test]
1321 fn split_frontmatter_preserves_delimiters() {
1322 let input = "---\ntitle: Hello\ndate: 2026-01-01\n---\n\n# Body text";
1323 let (fm, body) = split_frontmatter(input);
1324 assert!(fm.starts_with("---"));
1325 assert!(fm.ends_with("---"));
1326 assert!(fm.contains("title: Hello"));
1327 assert!(body.contains("# Body text"));
1328 }
1329
1330 #[test]
1331 fn split_frontmatter_toml_preserves() {
1332 let input = "+++\ntitle = \"Hello\"\n+++\nBody.";
1333 let (fm, body) = split_frontmatter(input);
1334 assert!(fm.starts_with("+++"));
1335 assert!(body.contains("Body."));
1336 }
1337
1338 #[test]
1339 fn split_frontmatter_no_frontmatter() {
1340 let input = "Just plain content.";
1341 let (fm, body) = split_frontmatter(input);
1342 assert!(fm.is_empty());
1343 assert_eq!(body, input);
1344 }
1345
1346 #[test]
1347 fn audit_and_fix_skips_when_ollama_unavailable() {
1348 let dir = tempfile::tempdir().unwrap();
1349 let content = dir.path().join("content");
1350 fs::create_dir_all(&content).unwrap();
1351 fs::write(content.join("test.md"), "---\ntitle: T\n---\nSimple text.")
1352 .unwrap();
1353
1354 let config = LlmConfig {
1355 endpoint: "http://localhost:99999".to_string(),
1356 ..LlmConfig::default()
1357 };
1358 let result = LlmPlugin::audit_and_fix(&content, &config).unwrap();
1359 assert_eq!(result, 0);
1360 }
1361
1362 #[test]
1363 fn full_repo_readability_audit() {
1364 let dirs = [
1366 ("docs/guide", 15.0),
1367 ("examples/basic/content", 10.0),
1368 ("examples/blog/content", 10.0),
1369 ("examples/docs/content", 13.0),
1370 ("examples/landing/content", 10.0),
1371 ("examples/plugins/content", 10.0),
1372 ("examples/portfolio/content", 10.0),
1373 ("examples/quickstart/content", 10.0),
1374 ("examples/content/en", 10.0),
1375 ];
1376
1377 let mut total_files = 0usize;
1378 let mut total_pass = 0usize;
1379 let mut total_fail = 0usize;
1380
1381 println!("\n{}", "=".repeat(60));
1382 println!(" FULL REPOSITORY READABILITY AUDIT");
1383 println!("{}\n", "=".repeat(60));
1384
1385 for (dir, target) in &dirs {
1386 let path = Path::new(dir);
1387 if !path.exists() {
1388 continue;
1389 }
1390
1391 let report = LlmPlugin::audit_all(path, *target).unwrap();
1392 if report.total_files == 0 {
1393 continue;
1394 }
1395
1396 println!("── {dir} (target: grade {target:.0}) ��─");
1397 for r in &report.results {
1398 let status = if r.passes { "PASS" } else { "FAIL" };
1399 println!(
1400 " {:.<40} grade {:>5.1} ease {:>5.1} [{status}]",
1401 r.path, r.grade_level, r.reading_ease
1402 );
1403 }
1404 println!(" → {}/{} pass\n", report.passing, report.total_files);
1405
1406 total_files += report.total_files;
1407 total_pass += report.passing;
1408 total_fail += report.failing;
1409 }
1410
1411 println!("{}", "=".repeat(60));
1412 println!(
1413 " TOTAL: {total_files} files — {total_pass} pass, {total_fail} fail"
1414 );
1415 println!("{}\n", "=".repeat(60));
1416 }
1417
1418 #[test]
1419 fn audit_docs_guide() {
1420 let guide_dir = Path::new("docs/guide");
1425 if !guide_dir.exists() {
1426 return; }
1428
1429 let report = LlmPlugin::audit_all(guide_dir, 17.0).unwrap();
1430 for result in &report.results {
1431 let status = if result.passes { "PASS" } else { "FAIL" };
1432 println!(
1433 "[readability] {}: grade={:.1}, ease={:.1}, avg_sentence={:.1} — {}",
1434 result.path,
1435 result.grade_level,
1436 result.reading_ease,
1437 result.avg_sentence_len,
1438 status
1439 );
1440 }
1441
1442 println!(
1443 "\n[readability] {}/{} files pass (target: grade {:.0})",
1444 report.passing, report.total_files, report.target_grade
1445 );
1446 }
1447
1448 #[test]
1451 fn is_ollama_available_unreachable() {
1452 assert!(!is_ollama_available("http://localhost:99999"));
1453 }
1454
1455 #[test]
1456 fn call_ollama_unreachable_returns_none() {
1457 assert!(call_ollama("http://localhost:99999", "llama3", "hi").is_none());
1458 }
1459
1460 #[test]
1461 fn needs_meta_description_with_content_attr_first() {
1462 let html = r#"<meta content="Decent length description that is more than fifty characters long enough" name="description">"#;
1464 assert!(!needs_meta_description(html));
1466 }
1467
1468 #[test]
1469 fn inject_meta_description_no_head() {
1470 let html = "<html><body>No head tag</body></html>";
1471 let result = inject_meta_description(html, "desc");
1472 assert_eq!(result, html); }
1474
1475 #[test]
1476 fn inject_jsonld_no_head() {
1477 let html = "<html><body>No head</body></html>";
1478 let result = inject_jsonld_description(html, "desc");
1479 assert_eq!(result, html);
1480 }
1481
1482 #[test]
1483 fn extract_page_text_no_body() {
1484 let html = "just plain text no tags";
1485 let text = extract_page_text(html, 100);
1486 assert_eq!(text, "just plain text no tags");
1487 }
1488
1489 #[test]
1490 fn extract_page_text_truncates() {
1491 let html = "<body><p>word </p></body>";
1492 let text = extract_page_text(html, 3);
1493 assert!(text.len() <= 5);
1494 }
1495
1496 #[test]
1497 fn generate_missing_alt_text_no_images() {
1498 let mut html = "<html><body><p>No images</p></body></html>".to_string();
1499 let count = generate_missing_alt_text(
1500 &mut html,
1501 "llama3",
1502 "http://localhost:99999",
1503 true,
1504 Path::new("test.html"),
1505 Path::new("."),
1506 );
1507 assert_eq!(count, 0);
1508 }
1509
1510 #[test]
1511 fn readability_audit_single_word() {
1512 let audit = ReadabilityAudit::analyze("Hello");
1513 assert!(audit.grade_level >= 0.0);
1514 assert!(audit.avg_sentence_len >= 0.0);
1515 }
1516
1517 #[test]
1518 fn count_word_syllables_empty() {
1519 assert_eq!(count_word_syllables(""), 1);
1520 }
1521
1522 #[test]
1523 fn count_word_syllables_numbers() {
1524 assert_eq!(count_word_syllables("123"), 1);
1525 }
1526
1527 #[test]
1528 fn split_frontmatter_unclosed() {
1529 let input = "---\ntitle: Hello\nNo closing delimiter";
1530 let (fm, body) = split_frontmatter(input);
1531 assert!(fm.is_empty());
1532 assert_eq!(body, input);
1533 }
1534
1535 #[test]
1536 fn llm_plugin_skips_missing_site_dir() {
1537 let plugin = LlmPlugin::new(LlmConfig::default());
1538 let ctx = PluginContext::new(
1539 Path::new("/tmp/c"),
1540 Path::new("/tmp/b"),
1541 Path::new("/nonexistent/site"),
1542 Path::new("/tmp/t"),
1543 );
1544 assert!(plugin.after_compile(&ctx).is_ok());
1545 }
1546
1547 #[test]
1548 fn config_defaults_readability() {
1549 let config = LlmConfig::default();
1550 assert!((config.target_grade - 8.0).abs() < f64::EPSILON);
1551 assert_eq!(config.max_refinement_attempts, 1);
1552 }
1553
1554 #[test]
1555 fn llm_plugin_skips_when_ollama_unavailable() {
1556 let plugin = LlmPlugin::new(LlmConfig {
1557 endpoint: "http://localhost:99999".to_string(),
1558 ..LlmConfig::default()
1559 });
1560
1561 let dir = tempfile::tempdir().unwrap();
1562 let site = dir.path().join("site");
1563 fs::create_dir_all(&site).unwrap();
1564 fs::write(site.join("index.html"), "<html><body></body></html>")
1565 .unwrap();
1566
1567 let ctx = PluginContext::new(dir.path(), dir.path(), &site, dir.path());
1568 plugin.after_compile(&ctx).unwrap();
1570 }
1571
1572 #[test]
1575 fn ai_fix_report_serializes_to_json() {
1576 let report = AiFixReport {
1577 total_audited: 10,
1578 total_failing: 3,
1579 total_fixed: 2,
1580 results: vec![
1581 AiFixResult {
1582 path: "docs/guide.md".to_string(),
1583 before_grade: 12.5,
1584 after_grade: 7.2,
1585 improved: true,
1586 action: "rewritten".to_string(),
1587 },
1588 AiFixResult {
1589 path: "docs/api.md".to_string(),
1590 before_grade: 14.0,
1591 after_grade: 13.8,
1592 improved: false,
1593 action: "no-improvement".to_string(),
1594 },
1595 ],
1596 };
1597 let json = serde_json::to_string(&report).unwrap();
1598 assert!(json.contains("\"total_fixed\":2"));
1599 assert!(json.contains("\"action\":\"rewritten\""));
1600 }
1601
1602 #[test]
1603 fn ai_fix_report_skips_when_ollama_unavailable() {
1604 let dir = tempfile::tempdir().unwrap();
1605 let content = dir.path().join("content");
1606 fs::create_dir_all(&content).unwrap();
1607 fs::write(
1608 content.join("test.md"),
1609 "---\ntitle: T\n---\nThe implementation of sophisticated algorithms.",
1610 )
1611 .unwrap();
1612
1613 let config = LlmConfig {
1614 endpoint: "http://localhost:99999".to_string(),
1615 max_refinement_attempts: 3,
1616 ..LlmConfig::default()
1617 };
1618 let report =
1619 LlmPlugin::audit_and_fix_with_report(&content, &config).unwrap();
1620 assert_eq!(report.total_fixed, 0);
1621 assert!(report.results.is_empty());
1622 }
1623
1624 #[test]
1627 fn formula_from_lang_english() {
1628 assert_eq!(
1629 ReadabilityFormula::from_lang("en"),
1630 Some(ReadabilityFormula::FleschKincaid)
1631 );
1632 assert_eq!(
1633 ReadabilityFormula::from_lang("en-US"),
1634 Some(ReadabilityFormula::FleschKincaid)
1635 );
1636 }
1637
1638 #[test]
1639 fn formula_from_lang_french() {
1640 assert_eq!(
1641 ReadabilityFormula::from_lang("fr"),
1642 Some(ReadabilityFormula::KandelMoles)
1643 );
1644 assert_eq!(
1645 ReadabilityFormula::from_lang("fr-CA"),
1646 Some(ReadabilityFormula::KandelMoles)
1647 );
1648 }
1649
1650 #[test]
1651 fn formula_from_lang_german() {
1652 assert_eq!(
1653 ReadabilityFormula::from_lang("de"),
1654 Some(ReadabilityFormula::WienerSachtextformel)
1655 );
1656 assert_eq!(
1657 ReadabilityFormula::from_lang("de-AT"),
1658 Some(ReadabilityFormula::WienerSachtextformel)
1659 );
1660 }
1661
1662 #[test]
1663 fn formula_from_lang_italian() {
1664 assert_eq!(
1665 ReadabilityFormula::from_lang("it"),
1666 Some(ReadabilityFormula::Gulpease)
1667 );
1668 }
1669
1670 #[test]
1671 fn formula_from_lang_swedish() {
1672 assert_eq!(
1673 ReadabilityFormula::from_lang("sv"),
1674 Some(ReadabilityFormula::Lix)
1675 );
1676 assert_eq!(
1677 ReadabilityFormula::from_lang("nb"),
1678 Some(ReadabilityFormula::Lix)
1679 );
1680 assert_eq!(
1681 ReadabilityFormula::from_lang("da"),
1682 Some(ReadabilityFormula::Lix)
1683 );
1684 }
1685
1686 #[test]
1687 fn formula_from_lang_spanish() {
1688 assert_eq!(
1689 ReadabilityFormula::from_lang("es"),
1690 Some(ReadabilityFormula::FernandezHuerta)
1691 );
1692 }
1693
1694 #[test]
1695 fn formula_from_lang_unsupported() {
1696 assert_eq!(ReadabilityFormula::from_lang("ja"), None);
1697 assert_eq!(ReadabilityFormula::from_lang("zh"), None);
1698 assert_eq!(ReadabilityFormula::from_lang("ar"), None);
1699 }
1700
1701 #[test]
1702 fn kandel_moles_simple_french() {
1703 let text = "Le chat est sur le tapis. Il fait beau. Le soleil brille.";
1704 let audit = ReadabilityAudit::analyze_with_lang(text, "fr");
1705 assert!(
1706 audit.reading_ease > 50.0,
1707 "Simple French should be readable, got {:.1}",
1708 audit.reading_ease
1709 );
1710 }
1711
1712 #[test]
1713 fn wiener_simple_german() {
1714 let text = "Die Katze sitzt auf der Matte. Es ist ein guter Tag. Die Sonne scheint.";
1715 let audit = ReadabilityAudit::analyze_with_lang(text, "de");
1716 assert!(
1717 audit.grade_level < 15.0,
1718 "Simple German got grade {:.1}",
1719 audit.grade_level
1720 );
1721 }
1722
1723 #[test]
1724 fn gulpease_simple_italian() {
1725 let text = "Il gatto si siede sul tappeto. Il sole splende. Oggi è una bella giornata.";
1726 let audit = ReadabilityAudit::analyze_with_lang(text, "it");
1727 assert!(
1728 audit.reading_ease > 40.0,
1729 "Simple Italian got ease {:.1}",
1730 audit.reading_ease
1731 );
1732 }
1733
1734 #[test]
1735 fn lix_simple_swedish() {
1736 let text = "Katten sitter på mattan. Solen skiner. Det är en fin dag.";
1737 let audit = ReadabilityAudit::analyze_with_lang(text, "sv");
1738 assert!(audit.grade_level >= 0.0);
1739 assert!(audit.reading_ease > 0.0);
1740 }
1741
1742 #[test]
1743 fn fernandez_huerta_simple_spanish() {
1744 let text = "El gato está en la mesa. El sol brilla. Es un buen día.";
1745 let audit = ReadabilityAudit::analyze_with_lang(text, "es");
1746 assert!(
1747 audit.reading_ease > 50.0,
1748 "Simple Spanish got ease {:.1}",
1749 audit.reading_ease
1750 );
1751 }
1752
1753 #[test]
1754 fn analyze_with_lang_empty_defaults_to_english() {
1755 let text = "The cat sat on the mat.";
1756 let a = ReadabilityAudit::analyze(text);
1757 let b = ReadabilityAudit::analyze_with_lang(text, "");
1758 assert!((a.grade_level - b.grade_level).abs() < f64::EPSILON);
1759 }
1760
1761 #[test]
1762 fn analyze_with_lang_unsupported_falls_back() {
1763 let text = "The cat sat on the mat.";
1764 let a = ReadabilityAudit::analyze(text);
1765 let b = ReadabilityAudit::analyze_with_lang(text, "ja");
1766 assert!((a.grade_level - b.grade_level).abs() < f64::EPSILON);
1767 }
1768
1769 #[test]
1770 fn extract_frontmatter_lang_yaml() {
1771 let content = "---\ntitle: Hello\nlanguage: fr\n---\nBody.";
1772 assert_eq!(extract_frontmatter_lang(content), "fr");
1773 }
1774
1775 #[test]
1776 fn extract_frontmatter_lang_yaml_short() {
1777 let content = "---\ntitle: Hello\nlang: de\n---\nBody.";
1778 assert_eq!(extract_frontmatter_lang(content), "de");
1779 }
1780
1781 #[test]
1782 fn extract_frontmatter_lang_toml() {
1783 let content = "+++\ntitle = \"Hello\"\nlanguage = \"it\"\n+++\nBody.";
1784 assert_eq!(extract_frontmatter_lang(content), "it");
1785 }
1786
1787 #[test]
1788 fn extract_frontmatter_lang_missing() {
1789 let content = "---\ntitle: Hello\n---\nBody.";
1790 assert_eq!(extract_frontmatter_lang(content), "");
1791 }
1792
1793 #[test]
1794 fn extract_frontmatter_lang_no_frontmatter() {
1795 let content = "Just plain text.";
1796 assert_eq!(extract_frontmatter_lang(content), "");
1797 }
1798
1799 #[test]
1800 fn audit_all_respects_language() {
1801 let dir = tempfile::tempdir().unwrap();
1802 let content = dir.path().join("content");
1803 fs::create_dir_all(&content).unwrap();
1804
1805 fs::write(
1806 content.join("french.md"),
1807 "---\ntitle: Bonjour\nlanguage: fr\n---\nLe chat est sur le tapis. Il fait beau.",
1808 )
1809 .unwrap();
1810
1811 let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
1812 assert_eq!(report.total_files, 1);
1813 }
1815
1816 #[test]
1819 fn kandel_moles_empty_text() {
1820 let audit = ReadabilityAudit::analyze_with_lang("", "fr");
1821 assert!(audit.grade_level.abs() < f64::EPSILON);
1822 assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1823 assert!(audit.avg_sentence_len.abs() < f64::EPSILON);
1824 }
1825
1826 #[test]
1827 fn wiener_empty_text() {
1828 let audit = ReadabilityAudit::analyze_with_lang("", "de");
1829 assert!(audit.grade_level.abs() < f64::EPSILON);
1830 assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1831 }
1832
1833 #[test]
1834 fn gulpease_empty_text() {
1835 let audit = ReadabilityAudit::analyze_with_lang("", "it");
1836 assert!(audit.grade_level.abs() < f64::EPSILON);
1837 assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1838 }
1839
1840 #[test]
1841 fn lix_empty_text() {
1842 let audit = ReadabilityAudit::analyze_with_lang("", "sv");
1843 assert!(audit.grade_level.abs() < f64::EPSILON);
1844 assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1845 }
1846
1847 #[test]
1848 fn fernandez_huerta_empty_text() {
1849 let audit = ReadabilityAudit::analyze_with_lang("", "es");
1850 assert!(audit.grade_level.abs() < f64::EPSILON);
1851 assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1852 }
1853
1854 #[test]
1857 fn kandel_moles_single_word() {
1858 let audit = ReadabilityAudit::analyze_with_lang("Bonjour", "fr");
1859 assert!(audit.grade_level >= 0.0);
1860 assert!(audit.reading_ease >= 0.0);
1861 assert!(audit.avg_sentence_len >= 1.0);
1862 }
1863
1864 #[test]
1865 fn wiener_single_word() {
1866 let audit = ReadabilityAudit::analyze_with_lang("Hallo", "de");
1867 assert!(audit.grade_level >= 0.0);
1868 assert!(audit.avg_sentence_len >= 1.0);
1869 }
1870
1871 #[test]
1872 fn gulpease_single_word() {
1873 let audit = ReadabilityAudit::analyze_with_lang("Ciao", "it");
1874 assert!(audit.grade_level >= 0.0);
1875 assert!(audit.avg_sentence_len >= 1.0);
1876 }
1877
1878 #[test]
1879 fn lix_single_word() {
1880 let audit = ReadabilityAudit::analyze_with_lang("Hej", "sv");
1881 assert!(audit.grade_level >= 0.0);
1882 }
1883
1884 #[test]
1885 fn fernandez_huerta_single_word() {
1886 let audit = ReadabilityAudit::analyze_with_lang("Hola", "es");
1887 assert!(audit.grade_level >= 0.0);
1888 }
1889
1890 #[test]
1893 fn kandel_moles_long_text() {
1894 let text = "Le développement de nouvelles infrastructures \
1895 technologiques nécessite une compréhension \
1896 approfondie des systèmes complexes. \
1897 Les algorithmes sophistiqués démontrent \
1898 une efficacité considérable. \
1899 La modernisation progressive des architectures \
1900 informatiques représente un défi majeur.";
1901 let audit = ReadabilityAudit::analyze_with_lang(text, "fr");
1902 assert!(audit.grade_level > 0.0);
1903 assert!(audit.reading_ease >= 0.0);
1904 assert!(audit.avg_sentence_len > 1.0);
1905 }
1906
1907 #[test]
1908 fn wiener_long_text() {
1909 let text = "Die Implementierung fortschrittlicher kryptografischer \
1910 Algorithmen erfordert umfassendes Verständnis \
1911 mathematischer Grundlagen. Asymmetrische \
1912 Verschlüsselungsprotokolle weisen erheblichen \
1913 Rechenaufwand auf. Die systematische Optimierung \
1914 komplexer Datenstrukturen bleibt herausfordernd.";
1915 let audit = ReadabilityAudit::analyze_with_lang(text, "de");
1916 assert!(audit.grade_level > 0.0);
1917 assert!(audit.avg_sentence_len > 1.0);
1918 }
1919
1920 #[test]
1921 fn gulpease_long_text() {
1922 let text = "L'implementazione di algoritmi crittografici sofisticati \
1923 richiede una comprensione approfondita dei fondamenti \
1924 matematici. I protocolli di crittografia asimmetrica \
1925 dimostrano un considerevole sovraccarico computazionale. \
1926 L'ottimizzazione sistematica delle strutture dati \
1927 complesse rimane impegnativa.";
1928 let audit = ReadabilityAudit::analyze_with_lang(text, "it");
1929 assert!(audit.grade_level > 0.0);
1930 assert!(audit.avg_sentence_len > 1.0);
1931 }
1932
1933 #[test]
1934 fn lix_long_text() {
1935 let text = "Implementeringen av avancerade kryptografiska algoritmer \
1936 kräver omfattande förståelse av matematiska grunder. \
1937 Asymmetriska krypteringsprotokoll uppvisar betydande \
1938 beräkningsbelastning. Systematisk optimering av komplexa \
1939 datastrukturer förblir utmanande.";
1940 let audit = ReadabilityAudit::analyze_with_lang(text, "sv");
1941 assert!(audit.grade_level > 0.0);
1942 assert!(audit.avg_sentence_len > 1.0);
1943 }
1944
1945 #[test]
1946 fn fernandez_huerta_long_text() {
1947 let text =
1948 "La implementación de algoritmos criptográficos sofisticados \
1949 requiere una comprensión profunda de los fundamentos \
1950 matemáticos. Los protocolos de cifrado asimétrico \
1951 demuestran una considerable sobrecarga computacional. \
1952 La optimización sistemática de estructuras de datos \
1953 complejas sigue siendo un desafío.";
1954 let audit = ReadabilityAudit::analyze_with_lang(text, "es");
1955 assert!(audit.grade_level > 0.0);
1956 assert!(audit.avg_sentence_len > 1.0);
1957 }
1958
1959 #[test]
1962 fn wiener_mixed_syllable_words() {
1963 let text = "Ich bin gut. Das Haus ist sehr interessant. \
1965 Die Universität hat viele Studenten.";
1966 let audit = ReadabilityAudit::analyze_with_lang(text, "de");
1967 assert!(audit.grade_level >= 0.0);
1968 assert!(audit.reading_ease >= 0.0);
1969 assert!(audit.reading_ease <= 100.0);
1970 }
1971
1972 #[test]
1975 fn lix_mixed_word_lengths() {
1976 let text = "En bok om programmering. \
1978 Datavetenskapliga beräkningar kräver noggrannhet.";
1979 let audit = ReadabilityAudit::analyze_with_lang(text, "sv");
1980 assert!(audit.grade_level > 0.0);
1981 assert!(audit.reading_ease >= 0.0);
1982 assert!(audit.reading_ease <= 100.0);
1983 }
1984
1985 #[test]
1988 fn extract_frontmatter_lang_toml_with_quotes() {
1989 let content =
1990 "+++\ntitle = \"Hello\"\nlanguage = \"en-US\"\n+++\nBody.";
1991 assert_eq!(extract_frontmatter_lang(content), "en-US");
1992 }
1993
1994 #[test]
1995 fn extract_frontmatter_lang_first_wins() {
1996 let content = "---\nlanguage: fr\nlang: de\n---\nBody.";
1998 assert_eq!(extract_frontmatter_lang(content), "fr");
1999 }
2000
2001 #[test]
2002 fn extract_frontmatter_lang_whitespace_around_value() {
2003 let content = "---\nlanguage: es \n---\nBody.";
2004 assert_eq!(extract_frontmatter_lang(content), "es");
2005 }
2006
2007 #[test]
2008 fn extract_frontmatter_lang_yaml_quoted_value() {
2009 let content = "---\nlanguage: \"de\"\n---\nBody.";
2010 assert_eq!(extract_frontmatter_lang(content), "de");
2011 }
2012
2013 #[test]
2014 fn extract_frontmatter_lang_single_quoted() {
2015 let content = "---\nlanguage: 'it'\n---\nBody.";
2016 assert_eq!(extract_frontmatter_lang(content), "it");
2017 }
2018
2019 #[test]
2020 fn extract_frontmatter_lang_empty_value() {
2021 let content = "---\nlanguage: \n---\nBody.";
2022 assert_eq!(extract_frontmatter_lang(content), "");
2023 }
2024
2025 #[test]
2026 fn extract_frontmatter_lang_toml_lang_key() {
2027 let content = "+++\nlang = \"sv\"\n+++\nBody.";
2028 assert_eq!(extract_frontmatter_lang(content), "sv");
2029 }
2030
2031 #[test]
2034 fn audit_and_fix_with_report_all_passing() {
2035 let dir = tempfile::tempdir().unwrap();
2036 let content = dir.path().join("content");
2037 fs::create_dir_all(&content).unwrap();
2038
2039 fs::write(
2041 content.join("simple.md"),
2042 "---\ntitle: Simple\n---\nThe cat sat. It was good.",
2043 )
2044 .unwrap();
2045
2046 let config = LlmConfig {
2048 endpoint: "http://localhost:99999".to_string(),
2049 target_grade: 20.0,
2050 ..LlmConfig::default()
2051 };
2052 let report =
2053 LlmPlugin::audit_and_fix_with_report(&content, &config).unwrap();
2054 assert_eq!(report.total_fixed, 0);
2056 }
2057
2058 #[test]
2059 fn audit_and_fix_with_report_empty_dir() {
2060 let dir = tempfile::tempdir().unwrap();
2061 let content = dir.path().join("empty_content");
2062 fs::create_dir_all(&content).unwrap();
2063
2064 let config = LlmConfig {
2065 endpoint: "http://localhost:99999".to_string(),
2066 ..LlmConfig::default()
2067 };
2068 let report =
2069 LlmPlugin::audit_and_fix_with_report(&content, &config).unwrap();
2070 assert_eq!(report.total_audited, 0);
2071 assert_eq!(report.total_failing, 0);
2072 assert!(report.results.is_empty());
2073 }
2074
2075 #[test]
2076 fn audit_all_file_with_empty_body() {
2077 let dir = tempfile::tempdir().unwrap();
2078 let content = dir.path().join("content");
2079 fs::create_dir_all(&content).unwrap();
2080
2081 fs::write(content.join("empty_body.md"), "---\ntitle: T\n---\n")
2082 .unwrap();
2083
2084 let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
2085 assert_eq!(report.total_files, 1);
2086 assert_eq!(report.passing, 1);
2088 }
2089
2090 #[test]
2093 fn needs_meta_description_no_content_attr() {
2094 let html = r#"<meta name="description">"#;
2096 assert!(!needs_meta_description(html));
2099 }
2100
2101 #[test]
2102 fn needs_meta_description_multiple_meta_tags() {
2103 let html = r#"<meta name="author" content="Alice"><meta name="description" content="This is a sufficiently long description that is more than fifty characters long">"#;
2104 assert!(!needs_meta_description(html));
2105 }
2106
2107 #[test]
2108 fn needs_meta_description_empty_content() {
2109 let html = r#"<meta name="description" content="">"#;
2110 assert!(needs_meta_description(html));
2111 }
2112
2113 #[test]
2116 fn inject_meta_description_escapes_ampersand() {
2117 let html = "<html><head></head><body></body></html>";
2118 let result = inject_meta_description(html, "Tom & Jerry");
2119 assert!(result.contains("Tom & Jerry"));
2120 }
2121
2122 #[test]
2123 fn inject_meta_description_escapes_quotes() {
2124 let html = "<html><head></head><body></body></html>";
2125 let result = inject_meta_description(html, r#"A "great" page"#);
2126 assert!(result.contains("A "great" page"));
2127 }
2128
2129 #[test]
2130 fn inject_meta_description_escapes_angle_brackets() {
2131 let html = "<html><head></head><body></body></html>";
2132 let result = inject_meta_description(html, "x < y");
2133 assert!(result.contains("x < y"));
2134 }
2135
2136 #[test]
2137 fn inject_meta_description_all_special_chars() {
2138 let html = "<html><head></head><body></body></html>";
2139 let result = inject_meta_description(html, r#"A & B "C" <D>"#);
2140 assert!(result.contains("A & B "C" <D>"));
2142 }
2143
2144 #[test]
2147 fn extract_page_text_with_main_tag() {
2148 let html = "<html><body><div>ignored</div><main><p>Main content here.</p></main></body></html>";
2149 let text = extract_page_text(html, 500);
2150 assert!(text.contains("Main content here"));
2151 assert!(!text.contains("ignored"));
2153 }
2154
2155 #[test]
2156 fn extract_page_text_large_truncated() {
2157 let long_body = "word ".repeat(200);
2158 let html = format!("<body><p>{long_body}</p></body>");
2159 let text = extract_page_text(&html, 50);
2160 assert!(text.len() <= 60);
2162 }
2163
2164 #[test]
2165 fn extract_page_text_strips_control_chars() {
2166 let html = "<body>Hello\x00\x01World</body>";
2167 let text = extract_page_text(html, 100);
2168 assert_eq!(text, "HelloWorld");
2169 }
2170
2171 #[test]
2172 fn extract_page_text_nested_tags() {
2173 let html = "<body><div><span>A</span> <em>B</em></div></body>";
2174 let text = extract_page_text(html, 100);
2175 assert!(text.contains('A'));
2176 assert!(text.contains('B'));
2177 }
2178
2179 #[test]
2182 fn generate_missing_alt_text_empty_alt() {
2183 let mut html =
2184 r#"<html><body><img src="photo.jpg" alt=""></body></html>"#
2185 .to_string();
2186 let count = generate_missing_alt_text(
2188 &mut html,
2189 "llama3",
2190 "http://localhost:99999",
2191 false,
2192 Path::new("test.html"),
2193 Path::new("."),
2194 );
2195 assert_eq!(count, 0);
2197 }
2198
2199 #[test]
2200 fn generate_missing_alt_text_missing_closing_bracket() {
2201 let mut html =
2202 "<html><body><img src=\"photo.jpg\"</body></html>".to_string();
2203 let count = generate_missing_alt_text(
2204 &mut html,
2205 "llama3",
2206 "http://localhost:99999",
2207 false,
2208 Path::new("test.html"),
2209 Path::new("."),
2210 );
2211 assert_eq!(count, 0);
2212 }
2213
2214 #[test]
2215 fn generate_missing_alt_text_mixed_images() {
2216 let mut html = r#"<html><body>
2217 <img src="a.jpg" alt="Good alt">
2218 <img src="b.jpg">
2219 <img src="c.jpg" alt="">
2220 </body></html>"#
2221 .to_string();
2222 let count = generate_missing_alt_text(
2226 &mut html,
2227 "llama3",
2228 "http://localhost:99999",
2229 true,
2230 Path::new("test.html"),
2231 Path::new("."),
2232 );
2233 assert_eq!(count, 0);
2234 }
2235
2236 #[test]
2237 fn generate_missing_alt_text_with_alt_present() {
2238 let mut html =
2239 r#"<html><body><img src="x.jpg" alt="Has alt text"></body></html>"#
2240 .to_string();
2241 let count = generate_missing_alt_text(
2242 &mut html,
2243 "llama3",
2244 "http://localhost:99999",
2245 false,
2246 Path::new("test.html"),
2247 Path::new("."),
2248 );
2249 assert_eq!(count, 0);
2250 }
2251
2252 #[test]
2255 fn formula_from_lang_underscore_separator() {
2256 assert_eq!(
2257 ReadabilityFormula::from_lang("en_US"),
2258 Some(ReadabilityFormula::FleschKincaid)
2259 );
2260 assert_eq!(
2261 ReadabilityFormula::from_lang("de_DE"),
2262 Some(ReadabilityFormula::WienerSachtextformel)
2263 );
2264 }
2265
2266 #[test]
2267 fn formula_from_lang_norwegian_variants() {
2268 assert_eq!(
2269 ReadabilityFormula::from_lang("nn"),
2270 Some(ReadabilityFormula::Lix)
2271 );
2272 assert_eq!(
2273 ReadabilityFormula::from_lang("no"),
2274 Some(ReadabilityFormula::Lix)
2275 );
2276 }
2277
2278 #[test]
2281 fn llm_config_default_values() {
2282 let config = LlmConfig::default();
2283 assert_eq!(config.model, "llama3");
2284 assert_eq!(config.endpoint, "http://localhost:11434");
2285 assert!(!config.dry_run);
2286 }
2287
2288 #[test]
2289 fn llm_plugin_debug_impl() {
2290 let plugin = LlmPlugin::new(LlmConfig::default());
2291 let debug = format!("{plugin:?}");
2292 assert!(debug.contains("LlmPlugin"));
2293 assert!(debug.contains("llama3"));
2294 }
2295
2296 #[test]
2299 fn split_frontmatter_leading_whitespace() {
2300 let input = " ---\ntitle: Hello\n---\nBody.";
2301 let (fm, body) = split_frontmatter(input);
2302 assert!(fm.contains("title: Hello"));
2303 assert!(body.contains("Body."));
2304 }
2305
2306 #[test]
2307 fn split_frontmatter_toml_unclosed() {
2308 let input = "+++\ntitle = \"Hello\"\nNo closing delimiter";
2309 let (fm, body) = split_frontmatter(input);
2310 assert!(fm.is_empty());
2311 assert_eq!(body, input);
2312 }
2313
2314 #[test]
2317 fn file_audit_result_serializes() {
2318 let result = FileAuditResult {
2319 path: "test.md".to_string(),
2320 grade_level: 7.5,
2321 reading_ease: 65.0,
2322 avg_sentence_len: 12.0,
2323 passes: true,
2324 };
2325 let json = serde_json::to_string(&result).unwrap();
2326 assert!(json.contains("\"path\":\"test.md\""));
2327 assert!(json.contains("\"passes\":true"));
2328 }
2329
2330 #[test]
2331 fn audit_report_serializes() {
2332 let report = AuditReport {
2333 target_grade: 8.0,
2334 total_files: 2,
2335 passing: 1,
2336 failing: 1,
2337 results: vec![],
2338 };
2339 let json = serde_json::to_string(&report).unwrap();
2340 assert!(json.contains("\"target_grade\":8.0"));
2341 assert!(json.contains("\"total_files\":2"));
2342 }
2343
2344 #[test]
2347 fn inject_jsonld_with_special_chars() {
2348 let html = "<html><head></head><body></body></html>";
2349 let result = inject_jsonld_description(html, "Tom & Jerry's \"show\"");
2350 assert!(result.contains("application/ld+json"));
2351 assert!(result.contains("Tom & Jerry"));
2352 }
2353
2354 #[test]
2357 fn count_syllables_multiple_vowel_groups() {
2358 assert!(count_word_syllables("beautiful") >= 2);
2360 }
2361
2362 #[test]
2363 fn count_syllables_consecutive_vowels() {
2364 assert_eq!(count_word_syllables("queue"), 1);
2366 }
2367
2368 #[test]
2369 fn count_syllables_all_consonants() {
2370 assert_eq!(count_word_syllables("rhythm"), 1);
2372 }
2373
2374 #[test]
2375 fn count_syllables_text_total() {
2376 let total = count_syllables("The cat sat on the mat.");
2377 assert!(total >= 6); }
2379
2380 #[test]
2381 fn count_words_basic() {
2382 assert_eq!(count_words("one two three"), 3);
2383 assert_eq!(count_words(""), 0);
2384 assert_eq!(count_words(" "), 0);
2385 assert_eq!(count_words("single"), 1);
2386 }
2387
2388 #[test]
2391 fn readability_grade_never_negative() {
2392 let audit = ReadabilityAudit::analyze("Hi.");
2394 assert!(audit.grade_level >= 0.0);
2395 assert!(audit.reading_ease >= 0.0);
2396 assert!(audit.reading_ease <= 100.0);
2397 }
2398
2399 #[test]
2400 fn readability_ease_clamped_to_100() {
2401 let audit = ReadabilityAudit::analyze("Go. Do. Be.");
2403 assert!(audit.reading_ease <= 100.0);
2404 assert!(audit.reading_ease >= 0.0);
2405 }
2406}