ssg/
llm.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Local LLM content plugin.
5//!
6//! Invokes a local LLM (Ollama, llama.cpp) at build time to auto-generate:
7//! - `alt` text for images missing it
8//! - `meta description` for pages where it's empty or < 50 chars
9//! - JSON-LD `description` fields from page content
10//!
11//! Configured via the `[ai]` section in `ssg.toml`:
12//! ```toml
13//! [ai]
14//! model = "llama3"
15//! endpoint = "http://localhost:11434"
16//! ```
17//!
18//! Graceful fallback: if no LLM is reachable, logs a warning and skips.
19
20use crate::plugin::{Plugin, PluginContext};
21use anyhow::Result;
22use std::{fs, path::Path, process::Command};
23
24/// Configuration for the LLM plugin.
25#[derive(Debug, Clone)]
26pub struct LlmConfig {
27    /// Model name (e.g., `"llama3"`, `"mistral"`).
28    pub model: String,
29    /// Ollama API endpoint.
30    pub endpoint: String,
31    /// If true, print generated text but don't write files.
32    pub dry_run: bool,
33    /// Target Flesch-Kincaid Grade Level (default: 8.0).
34    pub target_grade: f64,
35    /// Max refinement attempts if readability exceeds target (default: 1).
36    pub max_refinement_attempts: usize,
37}
38
39impl Default for LlmConfig {
40    fn default() -> Self {
41        Self {
42            model: "llama3".to_string(),
43            endpoint: "http://localhost:11434".to_string(),
44            dry_run: false,
45            target_grade: 8.0,
46            max_refinement_attempts: 1,
47        }
48    }
49}
50
51/// Plugin that uses a local LLM to augment content at build time.
52#[derive(Debug)]
53pub struct LlmPlugin {
54    config: LlmConfig,
55}
56
57impl LlmPlugin {
58    /// Creates a new `LlmPlugin` with the given configuration.
59    #[must_use]
60    pub const fn new(config: LlmConfig) -> Self {
61        Self { config }
62    }
63}
64
65/// Result of auditing a single file's readability.
66#[derive(Debug, Clone, serde::Serialize)]
67pub struct FileAuditResult {
68    /// Relative file path.
69    pub path: String,
70    /// Flesch-Kincaid Grade Level.
71    pub grade_level: f64,
72    /// Flesch Reading Ease score.
73    pub reading_ease: f64,
74    /// Average words per sentence.
75    pub avg_sentence_len: f64,
76    /// Whether it passes the target grade threshold.
77    pub passes: bool,
78}
79
80/// Aggregated readability audit report.
81#[derive(Debug, Clone, serde::Serialize)]
82pub struct AuditReport {
83    /// Target grade level used for pass/fail.
84    pub target_grade: f64,
85    /// Total files scanned.
86    pub total_files: usize,
87    /// Files that pass the readability threshold.
88    pub passing: usize,
89    /// Files that exceed the readability threshold.
90    pub failing: usize,
91    /// Per-file results.
92    pub results: Vec<FileAuditResult>,
93}
94
95/// Result of the agentic AI fix pipeline for a single file.
96#[derive(Debug, Clone, serde::Serialize)]
97pub struct AiFixResult {
98    /// Relative file path.
99    pub path: String,
100    /// Grade level before fix attempt.
101    pub before_grade: f64,
102    /// Grade level after fix attempt (same as before if not improved).
103    pub after_grade: f64,
104    /// Whether the fix improved readability.
105    pub improved: bool,
106    /// Action taken: "rewritten", "skipped", "no-improvement", "ollama-unavailable".
107    pub action: String,
108}
109
110/// Aggregated report from the agentic AI fix pipeline.
111#[derive(Debug, Clone, serde::Serialize)]
112pub struct AiFixReport {
113    /// Total files audited.
114    pub total_audited: usize,
115    /// Files that failed the readability threshold.
116    pub total_failing: usize,
117    /// Files successfully improved.
118    pub total_fixed: usize,
119    /// Per-file results.
120    pub results: Vec<AiFixResult>,
121}
122
123impl LlmPlugin {
124    /// Audits all Markdown files in a directory for readability.
125    ///
126    /// Returns a structured report with per-file Flesch-Kincaid scores.
127    /// Does not require an LLM — uses the local `ReadabilityAudit` engine.
128    ///
129    /// **Note:** The syllable heuristic is English-only. Non-English
130    /// content (Bengali, Hindi, Turkish, etc.) produces inflated scores.
131    /// Use the `en/` subdirectory for accurate results on multilingual
132    /// repos, or filter results by locale.
133    pub fn audit_all(
134        content_dir: &Path,
135        target_grade: f64,
136    ) -> Result<AuditReport> {
137        let md_files =
138            crate::walk::walk_files(content_dir, "md").unwrap_or_default();
139
140        let mut results = Vec::with_capacity(md_files.len());
141
142        for path in &md_files {
143            let Ok(content) = fs::read_to_string(path) else {
144                continue; // File may have been removed by a concurrent test
145            };
146            // Strip frontmatter before auditing prose
147            let body = strip_frontmatter(&content);
148            // Detect language from frontmatter
149            let lang = extract_frontmatter_lang(&content);
150            let audit = ReadabilityAudit::analyze_with_lang(&body, &lang);
151            let rel = path
152                .strip_prefix(content_dir)
153                .unwrap_or(path)
154                .to_string_lossy()
155                .to_string();
156
157            results.push(FileAuditResult {
158                path: rel,
159                grade_level: (audit.grade_level * 10.0).round() / 10.0,
160                reading_ease: (audit.reading_ease * 10.0).round() / 10.0,
161                avg_sentence_len: (audit.avg_sentence_len * 10.0).round()
162                    / 10.0,
163                passes: audit.grade_level <= target_grade,
164            });
165        }
166
167        let passing = results.iter().filter(|r| r.passes).count();
168        let failing = results.len() - passing;
169
170        Ok(AuditReport {
171            target_grade,
172            total_files: results.len(),
173            passing,
174            failing,
175            results,
176        })
177    }
178
179    /// Audits and rewrites failing Markdown files via LLM refinement.
180    ///
181    /// For each file that exceeds `target_grade`:
182    /// 1. Extracts the prose body (strips frontmatter)
183    /// 2. Sends it to the LLM with a simplification prompt
184    /// 3. If the refined version scores better, writes it back
185    ///    (preserving the original frontmatter)
186    /// 4. If `dry_run`, prints the diff without writing
187    ///
188    /// Returns the number of files rewritten.
189    pub fn audit_and_fix(
190        content_dir: &Path,
191        config: &LlmConfig,
192    ) -> Result<usize> {
193        if !is_ollama_available(&config.endpoint) {
194            log::warn!(
195                "[llm] Ollama not reachable at {}, skipping auto-fix",
196                config.endpoint
197            );
198            return Ok(0);
199        }
200
201        let report = Self::audit_all(content_dir, config.target_grade)?;
202        let failing: Vec<_> =
203            report.results.iter().filter(|r| !r.passes).collect();
204
205        if failing.is_empty() {
206            log::info!(
207                "[llm] All {} file(s) pass grade {:.0}",
208                report.total_files,
209                config.target_grade
210            );
211            return Ok(0);
212        }
213
214        log::info!(
215            "[llm] {} file(s) exceed grade {:.0}, attempting refinement",
216            failing.len(),
217            config.target_grade
218        );
219
220        let mut rewritten = 0usize;
221
222        for result in &failing {
223            let path = content_dir.join(&result.path);
224            let original = fs::read_to_string(&path)?;
225            let (frontmatter_block, body) = split_frontmatter(&original);
226            let body_trimmed = body.trim();
227
228            if body_trimmed.is_empty() {
229                continue;
230            }
231
232            let prompt = format!(
233                "Rewrite this Markdown content at a 6th-grade reading level. \
234                 Rules:\n\
235                 - Max 20 words per sentence\n\
236                 - Max 4 sentences per paragraph\n\
237                 - Use simple, common words\n\
238                 - Keep ALL facts, numbers, dates, and code blocks exactly the same\n\
239                 - Keep ALL Markdown headings (#, ##, ###) and formatting\n\
240                 - Return ONLY the rewritten Markdown, nothing else\n\n\
241                 {body_trimmed}"
242            );
243
244            if let Some(refined) = generate_with_refinement(
245                &config.endpoint,
246                &config.model,
247                &prompt,
248                config.target_grade,
249                config.max_refinement_attempts,
250            ) {
251                let refined_audit = ReadabilityAudit::analyze(&refined);
252                let original_audit = ReadabilityAudit::analyze(body_trimmed);
253
254                if refined_audit.grade_level < original_audit.grade_level {
255                    if config.dry_run {
256                        log::info!(
257                            "[llm] [dry-run] {}: grade {:.1} → {:.1}",
258                            result.path,
259                            original_audit.grade_level,
260                            refined_audit.grade_level
261                        );
262                    } else {
263                        // Reassemble: frontmatter + refined body
264                        let output =
265                            format!("{frontmatter_block}\n{refined}\n");
266                        fs::write(&path, output)?;
267                        log::info!(
268                            "[llm] Rewrote {}: grade {:.1} → {:.1}",
269                            result.path,
270                            original_audit.grade_level,
271                            refined_audit.grade_level
272                        );
273                        rewritten += 1;
274                    }
275                } else {
276                    log::warn!(
277                        "[llm] Could not improve {}: grade {:.1} (refined: {:.1})",
278                        result.path,
279                        original_audit.grade_level,
280                        refined_audit.grade_level
281                    );
282                }
283            }
284        }
285
286        Ok(rewritten)
287    }
288
289    /// Agentic pipeline: audit → diagnose → fix → verify → report.
290    ///
291    /// Like `audit_and_fix()` but returns a detailed JSON-serialisable
292    /// report with before/after scores for each file.
293    pub fn audit_and_fix_with_report(
294        content_dir: &Path,
295        config: &LlmConfig,
296    ) -> Result<AiFixReport> {
297        if !is_ollama_available(&config.endpoint) {
298            log::warn!(
299                "[ai-fix] Ollama not reachable at {}, skipping",
300                config.endpoint
301            );
302            return Ok(AiFixReport {
303                total_audited: 0,
304                total_failing: 0,
305                total_fixed: 0,
306                results: vec![],
307            });
308        }
309
310        let report = Self::audit_all(content_dir, config.target_grade)?;
311        let failing: Vec<_> =
312            report.results.iter().filter(|r| !r.passes).collect();
313        let mut fix_results = Vec::new();
314
315        for result in &failing {
316            let path = content_dir.join(&result.path);
317            let Ok(original) = fs::read_to_string(&path) else {
318                fix_results.push(AiFixResult {
319                    path: result.path.clone(),
320                    before_grade: result.grade_level,
321                    after_grade: result.grade_level,
322                    improved: false,
323                    action: "skipped".to_string(),
324                });
325                continue;
326            };
327            let (frontmatter_block, body) = split_frontmatter(&original);
328            let body_trimmed = body.trim();
329
330            if body_trimmed.is_empty() {
331                fix_results.push(AiFixResult {
332                    path: result.path.clone(),
333                    before_grade: result.grade_level,
334                    after_grade: result.grade_level,
335                    improved: false,
336                    action: "skipped".to_string(),
337                });
338                continue;
339            }
340
341            let prompt = format!(
342                "Rewrite this Markdown content at a 6th-grade reading level. \
343                 Rules:\n\
344                 - Max 20 words per sentence\n\
345                 - Max 4 sentences per paragraph\n\
346                 - Use simple, common words\n\
347                 - Keep ALL facts, numbers, dates, and code blocks exactly the same\n\
348                 - Keep ALL Markdown headings (#, ##, ###) and formatting\n\
349                 - Return ONLY the rewritten Markdown, nothing else\n\n\
350                 {body_trimmed}"
351            );
352
353            if let Some(refined) = generate_with_refinement(
354                &config.endpoint,
355                &config.model,
356                &prompt,
357                config.target_grade,
358                config.max_refinement_attempts,
359            ) {
360                let refined_audit = ReadabilityAudit::analyze(&refined);
361                let original_audit = ReadabilityAudit::analyze(body_trimmed);
362
363                if refined_audit.grade_level < original_audit.grade_level {
364                    if !config.dry_run {
365                        let output =
366                            format!("{frontmatter_block}\n{refined}\n");
367                        fs::write(&path, output)?;
368                    }
369                    fix_results.push(AiFixResult {
370                        path: result.path.clone(),
371                        before_grade: (original_audit.grade_level * 10.0)
372                            .round()
373                            / 10.0,
374                        after_grade: (refined_audit.grade_level * 10.0).round()
375                            / 10.0,
376                        improved: true,
377                        action: if config.dry_run {
378                            "dry-run".to_string()
379                        } else {
380                            "rewritten".to_string()
381                        },
382                    });
383                } else {
384                    fix_results.push(AiFixResult {
385                        path: result.path.clone(),
386                        before_grade: (original_audit.grade_level * 10.0)
387                            .round()
388                            / 10.0,
389                        after_grade: (refined_audit.grade_level * 10.0).round()
390                            / 10.0,
391                        improved: false,
392                        action: "no-improvement".to_string(),
393                    });
394                }
395            } else {
396                fix_results.push(AiFixResult {
397                    path: result.path.clone(),
398                    before_grade: result.grade_level,
399                    after_grade: result.grade_level,
400                    improved: false,
401                    action: "skipped".to_string(),
402                });
403            }
404        }
405
406        let total_fixed = fix_results.iter().filter(|r| r.improved).count();
407
408        Ok(AiFixReport {
409            total_audited: report.total_files,
410            total_failing: failing.len(),
411            total_fixed,
412            results: fix_results,
413        })
414    }
415}
416
417/// Splits content into `(frontmatter_block, body)`.
418///
419/// The frontmatter block includes delimiters so it can be
420/// reassembled verbatim. Returns `("", content)` if no
421/// frontmatter is found.
422fn split_frontmatter(content: &str) -> (String, String) {
423    let trimmed = content.trim_start();
424    let leading_ws = &content[..content.len() - trimmed.len()];
425
426    for delim in ["---", "+++"] {
427        if let Some(rest) = trimmed.strip_prefix(delim) {
428            if let Some(end) = rest.find(delim) {
429                let fm_end = delim.len() + end + delim.len();
430                let frontmatter = &trimmed[..fm_end];
431                let body = &trimmed[fm_end..];
432                return (
433                    format!("{leading_ws}{frontmatter}"),
434                    body.to_string(),
435                );
436            }
437        }
438    }
439
440    (String::new(), content.to_string())
441}
442
443/// Extracts the `language` or `lang` field from YAML/TOML frontmatter.
444fn extract_frontmatter_lang(content: &str) -> String {
445    let trimmed = content.trim_start();
446    for delim in ["---", "+++"] {
447        if let Some(rest) = trimmed.strip_prefix(delim) {
448            if let Some(end) = rest.find(delim) {
449                let fm = &rest[..end];
450                // Try YAML-style: `language: en` or `lang: en`
451                for line in fm.lines() {
452                    let line = line.trim();
453                    for key in ["language:", "lang:"] {
454                        if let Some(val) = line.strip_prefix(key) {
455                            let val =
456                                val.trim().trim_matches('"').trim_matches('\'');
457                            if !val.is_empty() {
458                                return val.to_string();
459                            }
460                        }
461                    }
462                }
463                // Try TOML-style: `language = "en"` or `lang = "en"`
464                for line in fm.lines() {
465                    let line = line.trim();
466                    for key in ["language", "lang"] {
467                        if line.starts_with(key) {
468                            if let Some(val) = line.split('=').nth(1) {
469                                let val = val
470                                    .trim()
471                                    .trim_matches('"')
472                                    .trim_matches('\'');
473                                if !val.is_empty() {
474                                    return val.to_string();
475                                }
476                            }
477                        }
478                    }
479                }
480            }
481        }
482    }
483    String::new()
484}
485
486/// Strips YAML/TOML frontmatter from Markdown content.
487fn strip_frontmatter(content: &str) -> String {
488    let trimmed = content.trim_start();
489    for delim in ["---", "+++"] {
490        if let Some(rest) = trimmed.strip_prefix(delim) {
491            if let Some(end) = rest.find(delim) {
492                return rest[end + delim.len()..].to_string();
493            }
494        }
495    }
496    content.to_string()
497}
498
499impl Plugin for LlmPlugin {
500    fn name(&self) -> &'static str {
501        "llm"
502    }
503
504    fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
505        if !ctx.site_dir.exists() {
506            return Ok(());
507        }
508
509        // Check if Ollama is available
510        if !is_ollama_available(&self.config.endpoint) {
511            log::warn!(
512                "[llm] Ollama not reachable at {}, skipping AI augmentation",
513                self.config.endpoint
514            );
515            return Ok(());
516        }
517
518        let html_files = ctx.get_html_files();
519        let mut augmented = 0usize;
520
521        for path in &html_files {
522            let html = fs::read_to_string(path)?;
523            let mut modified = html.clone();
524
525            // Auto-generate meta descriptions for pages with short/missing ones
526            if needs_meta_description(&modified) {
527                if let Some(desc) = generate_meta_description(
528                    &modified,
529                    &self.config.model,
530                    &self.config.endpoint,
531                    self.config.target_grade,
532                    self.config.max_refinement_attempts,
533                ) {
534                    let audit = ReadabilityAudit::analyze(&desc);
535                    if self.config.dry_run {
536                        let rel = path
537                            .strip_prefix(&ctx.site_dir)
538                            .unwrap_or(path)
539                            .display();
540                        log::info!(
541                            "[llm] [dry-run] {rel}: description = {desc}"
542                        );
543                        log::info!(
544                            "[llm] [dry-run] {rel}: grade={:.1}, ease={:.1}, avg_sentence={:.1}",
545                            audit.grade_level, audit.reading_ease, audit.avg_sentence_len
546                        );
547                    } else {
548                        modified = inject_meta_description(&modified, &desc);
549                        // Also populate JSON-LD Article description
550                        modified = inject_jsonld_description(&modified, &desc);
551                    }
552                }
553            }
554
555            // Auto-generate alt text for images missing it
556            let alt_count = generate_missing_alt_text(
557                &mut modified,
558                &self.config.model,
559                &self.config.endpoint,
560                self.config.dry_run,
561                path,
562                &ctx.site_dir,
563            );
564
565            if !self.config.dry_run && modified != html {
566                fs::write(path, &modified)?;
567                augmented += 1;
568            }
569
570            if alt_count > 0 {
571                augmented += 1;
572            }
573        }
574
575        if augmented > 0 {
576            log::info!(
577                "[llm] Augmented {augmented} page(s) with model '{}'",
578                self.config.model
579            );
580        }
581
582        Ok(())
583    }
584}
585
586/// Checks if Ollama is reachable at the given endpoint.
587fn is_ollama_available(endpoint: &str) -> bool {
588    // Try a simple HTTP health check via curl
589    Command::new("curl")
590        .args(["-sf", "--max-time", "2", endpoint])
591        .output()
592        .is_ok_and(|o| o.status.success())
593}
594
595/// Returns true if the page needs a meta description (missing or < 50 chars).
596fn needs_meta_description(html: &str) -> bool {
597    if let Some(start) = html.find("name=\"description\"") {
598        if let Some(content_start) = html[start..].find("content=\"") {
599            let abs = start + content_start + 9;
600            if let Some(end) = html[abs..].find('"') {
601                let desc = &html[abs..abs + end];
602                return desc.len() < 50;
603            }
604        }
605    }
606    // No description meta tag found
607    !html.contains("name=\"description\"")
608}
609
610/// Generates a meta description via LLM with readability refinement.
611fn generate_meta_description(
612    html: &str,
613    model: &str,
614    endpoint: &str,
615    target_grade: f64,
616    max_attempts: usize,
617) -> Option<String> {
618    let text = extract_page_text(html, 500);
619    if text.len() < 20 {
620        return None;
621    }
622
623    let prompt = format!(
624        "Write a concise SEO meta description (120-155 characters) for this page content. \
625         Use simple words and short sentences. \
626         Return ONLY the description text, no quotes or explanation:\n\n{text}"
627    );
628
629    generate_with_refinement(
630        endpoint,
631        model,
632        &prompt,
633        target_grade,
634        max_attempts,
635    )
636}
637
638/// Injects a meta description tag into the HTML head.
639fn inject_meta_description(html: &str, description: &str) -> String {
640    let escaped = description
641        .replace('&', "&amp;")
642        .replace('"', "&quot;")
643        .replace('<', "&lt;");
644    let tag = format!("<meta name=\"description\" content=\"{escaped}\">\n");
645
646    if let Some(pos) = html.find("</head>") {
647        let mut result = html.to_string();
648        result.insert_str(pos, &tag);
649        result
650    } else {
651        html.to_string()
652    }
653}
654
655/// Generates alt text for images that are missing it.
656fn generate_missing_alt_text(
657    html: &mut String,
658    model: &str,
659    endpoint: &str,
660    dry_run: bool,
661    path: &Path,
662    site_dir: &Path,
663) -> usize {
664    let mut count = 0;
665    let mut search_from = 0;
666
667    while let Some(start) = html[search_from..].find("<img") {
668        let abs_start = search_from + start;
669        let Some(tag_end) = html[abs_start..].find('>') else {
670            break;
671        };
672        let tag_end_abs = abs_start + tag_end + 1;
673        let tag = &html[abs_start..tag_end_abs];
674
675        if !tag.contains("alt=") || tag.contains("alt=\"\"") {
676            // Extract src for context
677            let src = extract_attr(tag, "src").unwrap_or_default();
678            let prompt = format!(
679                "Describe this image for an alt text attribute. The image file is named '{}'. \
680                 Return ONLY the alt text (max 125 characters), no quotes:\n",
681                src
682            );
683
684            if let Some(alt) = call_ollama(endpoint, model, &prompt) {
685                let alt = alt.trim().replace('"', "&quot;");
686                if dry_run {
687                    let rel =
688                        path.strip_prefix(site_dir).unwrap_or(path).display();
689                    log::info!(
690                        "[llm] [dry-run] {rel}: alt=\"{alt}\" for {src}"
691                    );
692                } else {
693                    // Replace the tag with one that has alt text
694                    let new_tag = if tag.contains("alt=\"\"") {
695                        tag.replace("alt=\"\"", &format!("alt=\"{alt}\""))
696                    } else {
697                        tag.replace("<img", &format!("<img alt=\"{alt}\""))
698                    };
699                    html.replace_range(abs_start..tag_end_abs, &new_tag);
700                }
701                count += 1;
702            }
703        }
704
705        search_from = tag_end_abs;
706    }
707
708    count
709}
710
711/// Extracts plain text from HTML for LLM prompting.
712fn extract_page_text(html: &str, max_chars: usize) -> String {
713    let body_start = html
714        .find("<main")
715        .or_else(|| html.find("<body"))
716        .unwrap_or(0);
717    let body = &html[body_start..];
718
719    let mut text = String::with_capacity(max_chars + 50);
720    let mut in_tag = false;
721    for ch in body.chars() {
722        if text.len() >= max_chars {
723            break;
724        }
725        match ch {
726            '<' => in_tag = true,
727            '>' => in_tag = false,
728            _ if !in_tag && !ch.is_control() => text.push(ch),
729            _ => {}
730        }
731    }
732
733    text.split_whitespace().collect::<Vec<_>>().join(" ")
734}
735
736/// Extracts an attribute value from an HTML tag.
737fn extract_attr(tag: &str, attr: &str) -> Option<String> {
738    let pattern = format!("{attr}=\"");
739    let start = tag.find(&pattern)? + pattern.len();
740    let end = tag[start..].find('"')? + start;
741    Some(tag[start..end].to_string())
742}
743
744// =====================================================================
745// Readability intelligence
746// =====================================================================
747
748/// Readability formula selection based on content language.
749///
750/// Marked `#[non_exhaustive]` so additional formulae (Dale-Chall,
751/// Linsear-Write, Coleman-Liau) can ship in minor versions.
752#[derive(Debug, Clone, Copy, PartialEq, Eq)]
753#[non_exhaustive]
754pub enum ReadabilityFormula {
755    /// Flesch-Kincaid (English).
756    FleschKincaid,
757    /// Kandel-Moles (French).
758    KandelMoles,
759    /// Wiener Sachtextformel (German).
760    WienerSachtextformel,
761    /// Gulpease index (Italian).
762    Gulpease,
763    /// LIX readability (Swedish/Scandinavian).
764    Lix,
765    /// Fernández Huerta (Spanish).
766    FernandezHuerta,
767}
768
769impl ReadabilityFormula {
770    /// Selects the appropriate formula from a language code.
771    ///
772    /// Accepts BCP 47 codes (e.g., `"en"`, `"fr"`, `"de-AT"`).
773    /// Returns `None` for unsupported languages.
774    #[must_use]
775    pub fn from_lang(lang: &str) -> Option<Self> {
776        let primary = lang.split(['-', '_']).next().unwrap_or(lang);
777        match primary.to_lowercase().as_str() {
778            "en" => Some(Self::FleschKincaid),
779            "fr" => Some(Self::KandelMoles),
780            "de" => Some(Self::WienerSachtextformel),
781            "it" => Some(Self::Gulpease),
782            "sv" | "nb" | "nn" | "da" | "no" => Some(Self::Lix),
783            "es" => Some(Self::FernandezHuerta),
784            _ => None,
785        }
786    }
787}
788
789/// Readability metrics for a text passage.
790#[derive(Debug, Clone, Copy)]
791pub struct ReadabilityAudit {
792    /// Flesch-Kincaid Grade Level (lower = simpler).
793    pub grade_level: f64,
794    /// Flesch Reading Ease (higher = easier, 0–100).
795    pub reading_ease: f64,
796    /// Average words per sentence.
797    pub avg_sentence_len: f64,
798}
799
800impl ReadabilityAudit {
801    /// Analyzes text and returns readability metrics.
802    #[must_use]
803    pub fn analyze(text: &str) -> Self {
804        let words = count_words(text);
805        let sentences = count_sentences(text);
806        let syllables = count_syllables(text);
807
808        if words == 0 || sentences == 0 {
809            return Self {
810                grade_level: 0.0,
811                reading_ease: 100.0,
812                avg_sentence_len: 0.0,
813            };
814        }
815
816        let wps = words as f64 / sentences as f64;
817        let spw = syllables as f64 / words as f64;
818
819        let grade = 0.39f64.mul_add(wps, 11.8f64.mul_add(spw, -15.59));
820        let ease = (-1.015f64).mul_add(wps, (-84.6f64).mul_add(spw, 206.835));
821
822        Self {
823            grade_level: grade.max(0.0),
824            reading_ease: ease.clamp(0.0, 100.0),
825            avg_sentence_len: wps,
826        }
827    }
828
829    /// Analyzes text using the appropriate formula for the given language.
830    ///
831    /// Falls back to Flesch-Kincaid if the language is unsupported or empty.
832    #[must_use]
833    pub fn analyze_with_lang(text: &str, lang: &str) -> Self {
834        let formula = if lang.is_empty() {
835            ReadabilityFormula::FleschKincaid
836        } else {
837            ReadabilityFormula::from_lang(lang)
838                .unwrap_or(ReadabilityFormula::FleschKincaid)
839        };
840
841        let words = count_words(text);
842        let sentences = count_sentences(text);
843        let syllables = count_syllables(text);
844        let chars: usize = text.chars().filter(|c| c.is_alphanumeric()).count();
845
846        if words == 0 || sentences == 0 {
847            return Self {
848                grade_level: 0.0,
849                reading_ease: 100.0,
850                avg_sentence_len: 0.0,
851            };
852        }
853
854        let wps = words as f64 / sentences as f64;
855        let spw = syllables as f64 / words as f64;
856
857        match formula {
858            ReadabilityFormula::FleschKincaid => Self::analyze(text),
859
860            ReadabilityFormula::KandelMoles => {
861                // Kandel-Moles reading ease (French)
862                let ease = 68.0f64.mul_add(-spw, 1.15f64.mul_add(-wps, 209.0));
863                Self {
864                    grade_level: ((100.0 - ease.clamp(0.0, 100.0)) / 10.0)
865                        .max(0.0),
866                    reading_ease: ease.clamp(0.0, 100.0),
867                    avg_sentence_len: wps,
868                }
869            }
870
871            ReadabilityFormula::WienerSachtextformel => {
872                // Wiener Sachtextformel (German)
873                let word_list: Vec<&str> = text.split_whitespace().collect();
874                let total = word_list.len().max(1) as f64;
875                let pct_3plus_syl = word_list
876                    .iter()
877                    .filter(|w| count_word_syllables(w) >= 3)
878                    .count() as f64
879                    / total
880                    * 100.0;
881                let pct_6plus_char = word_list
882                    .iter()
883                    .filter(|w| {
884                        w.chars().filter(|c| c.is_alphabetic()).count() > 6
885                    })
886                    .count() as f64
887                    / total
888                    * 100.0;
889                let pct_1syl = word_list
890                    .iter()
891                    .filter(|w| count_word_syllables(w) == 1)
892                    .count() as f64
893                    / total
894                    * 100.0;
895
896                let grade = 0.1935f64.mul_add(
897                    pct_3plus_syl,
898                    0.1672f64.mul_add(
899                        wps,
900                        (-0.1297f64).mul_add(
901                            pct_6plus_char,
902                            (-0.0327f64).mul_add(pct_1syl, -0.875),
903                        ),
904                    ),
905                );
906
907                Self {
908                    grade_level: grade.max(0.0),
909                    reading_ease: grade
910                        .clamp(0.0, 20.0)
911                        .mul_add(-5.0, 100.0)
912                        .clamp(0.0, 100.0),
913                    avg_sentence_len: wps,
914                }
915            }
916
917            ReadabilityFormula::Gulpease => {
918                // Gulpease index (Italian)
919                let ease = 89.0
920                    + 10.0f64
921                        .mul_add(-(chars as f64), 300.0 * sentences as f64)
922                        / words as f64;
923                Self {
924                    grade_level: ((100.0 - ease.clamp(0.0, 100.0)) / 10.0)
925                        .max(0.0),
926                    reading_ease: ease.clamp(0.0, 100.0),
927                    avg_sentence_len: wps,
928                }
929            }
930
931            ReadabilityFormula::Lix => {
932                // LIX (Swedish/Scandinavian)
933                let word_list: Vec<&str> = text.split_whitespace().collect();
934                let total = word_list.len().max(1) as f64;
935                let long_words = word_list
936                    .iter()
937                    .filter(|w| {
938                        w.chars().filter(|c| c.is_alphabetic()).count() > 6
939                    })
940                    .count() as f64;
941                let lix = wps + 100.0 * long_words / total;
942                // LIX scale: <25 very easy, 25-35 easy, 35-45 medium,
943                // 45-55 hard, >55 very hard
944                Self {
945                    grade_level: (lix / 5.0).max(0.0),
946                    reading_ease: (100.0 - lix).clamp(0.0, 100.0),
947                    avg_sentence_len: wps,
948                }
949            }
950
951            ReadabilityFormula::FernandezHuerta => {
952                // Fernández Huerta (Spanish)
953                let ease =
954                    1.02f64.mul_add(-wps, (-60.0f64).mul_add(spw, 206.84));
955                Self {
956                    grade_level: ((100.0 - ease.clamp(0.0, 100.0)) / 10.0)
957                        .max(0.0),
958                    reading_ease: ease.clamp(0.0, 100.0),
959                    avg_sentence_len: wps,
960                }
961            }
962        }
963    }
964}
965
966/// Counts words in text (whitespace-separated tokens).
967fn count_words(text: &str) -> usize {
968    text.split_whitespace().count()
969}
970
971/// Counts sentences by splitting on `.`, `!`, `?`.
972fn count_sentences(text: &str) -> usize {
973    text.chars()
974        .filter(|&c| c == '.' || c == '!' || c == '?')
975        .count()
976        .max(1)
977}
978
979/// Counts syllables using a lightweight heuristic:
980/// - Count vowel groups (consecutive vowels = 1 syllable)
981/// - Subtract silent trailing 'e'
982/// - Minimum 1 syllable per word
983fn count_syllables(text: &str) -> usize {
984    text.split_whitespace()
985        .map(|word| count_word_syllables(word))
986        .sum()
987}
988
989/// Counts syllables in a single word.
990fn count_word_syllables(word: &str) -> usize {
991    let word = word.to_lowercase();
992    let chars: Vec<char> = word.chars().filter(|c| c.is_alphabetic()).collect();
993    if chars.is_empty() {
994        return 1;
995    }
996
997    let vowels = b"aeiouy";
998    let mut count = 0usize;
999    let mut prev_vowel = false;
1000
1001    for &ch in &chars {
1002        let is_vowel = vowels.contains(&(ch as u8));
1003        if is_vowel && !prev_vowel {
1004            count += 1;
1005        }
1006        prev_vowel = is_vowel;
1007    }
1008
1009    // Subtract silent trailing 'e'
1010    if chars.len() > 2 && chars.last() == Some(&'e') && count > 1 {
1011        count -= 1;
1012    }
1013
1014    count.max(1)
1015}
1016
1017/// Generates text via LLM with readability-driven refinement.
1018///
1019/// If the initial output exceeds `target_grade`, re-prompts the LLM
1020/// once to simplify. Keeps the best available draft on failure.
1021fn generate_with_refinement(
1022    endpoint: &str,
1023    model: &str,
1024    prompt: &str,
1025    target_grade: f64,
1026    max_attempts: usize,
1027) -> Option<String> {
1028    let mut text = call_ollama(endpoint, model, prompt)?;
1029    let mut audit = ReadabilityAudit::analyze(&text);
1030
1031    for attempt in 0..max_attempts {
1032        if audit.grade_level <= target_grade {
1033            break;
1034        }
1035
1036        log::info!(
1037            "[llm] Grade {:.1} exceeds target {:.1}, refining (attempt {})",
1038            audit.grade_level,
1039            target_grade,
1040            attempt + 1
1041        );
1042
1043        let simplify_prompt = format!(
1044            "Rewrite this text at a 6th-grade reading level. \
1045             Use short sentences (max 20 words). Use simple words. \
1046             Keep all facts and numbers exactly the same. \
1047             Return ONLY the rewritten text:\n\n{text}"
1048        );
1049
1050        if let Some(refined) = call_ollama(endpoint, model, &simplify_prompt) {
1051            let refined_audit = ReadabilityAudit::analyze(&refined);
1052            if refined_audit.grade_level < audit.grade_level {
1053                text = refined;
1054                audit = refined_audit;
1055            }
1056        }
1057    }
1058
1059    Some(text)
1060}
1061
1062// =====================================================================
1063// JSON-LD generation
1064// =====================================================================
1065
1066/// Injects or updates a JSON-LD `Article` script block in the HTML head.
1067///
1068/// Populates `description`, `datePublished`, and `author` from the page
1069/// content and frontmatter sidecar.
1070fn inject_jsonld_description(html: &str, description: &str) -> String {
1071    // Skip if JSON-LD Article already has a description
1072    if html.contains("\"@type\":\"Article\"")
1073        && html.contains("\"description\"")
1074    {
1075        return html.to_string();
1076    }
1077
1078    let jsonld = serde_json::json!({
1079        "@context": "https://schema.org",
1080        "@type": "Article",
1081        "description": description,
1082    });
1083
1084    let script =
1085        format!("<script type=\"application/ld+json\">{}</script>\n", jsonld);
1086
1087    if let Some(pos) = html.find("</head>") {
1088        let mut result = html.to_string();
1089        result.insert_str(pos, &script);
1090        result
1091    } else {
1092        html.to_string()
1093    }
1094}
1095
1096/// Calls the Ollama API to generate text.
1097fn call_ollama(endpoint: &str, model: &str, prompt: &str) -> Option<String> {
1098    let url = format!("{}/api/generate", endpoint.trim_end_matches('/'));
1099    let payload = serde_json::json!({
1100        "model": model,
1101        "prompt": prompt,
1102        "stream": false,
1103    });
1104
1105    let output = Command::new("curl")
1106        .args([
1107            "-sf",
1108            "--max-time",
1109            "30",
1110            "-X",
1111            "POST",
1112            &url,
1113            "-H",
1114            "Content-Type: application/json",
1115            "-d",
1116            &payload.to_string(),
1117        ])
1118        .output()
1119        .ok()?;
1120
1121    if !output.status.success() {
1122        return None;
1123    }
1124
1125    let response: serde_json::Value =
1126        serde_json::from_slice(&output.stdout).ok()?;
1127    response
1128        .get("response")
1129        .and_then(|v| v.as_str())
1130        .map(|s| s.trim().to_string())
1131        .filter(|s| !s.is_empty())
1132}
1133
1134#[cfg(test)]
1135#[allow(clippy::unwrap_used, clippy::expect_used)]
1136mod tests {
1137    use super::*;
1138
1139    #[test]
1140    fn needs_meta_description_missing() {
1141        assert!(needs_meta_description("<html><head></head></html>"));
1142    }
1143
1144    #[test]
1145    fn needs_meta_description_short() {
1146        let html = r#"<html><head><meta name="description" content="Short"></head></html>"#;
1147        assert!(needs_meta_description(html));
1148    }
1149
1150    #[test]
1151    fn needs_meta_description_adequate() {
1152        let html = r#"<html><head><meta name="description" content="This is a sufficiently long meta description that exceeds fifty characters easily"></head></html>"#;
1153        assert!(!needs_meta_description(html));
1154    }
1155
1156    #[test]
1157    fn inject_meta_description_into_head() {
1158        let html = "<html><head><title>T</title></head><body></body></html>";
1159        let result = inject_meta_description(html, "Test description");
1160        assert!(result.contains("name=\"description\""));
1161        assert!(result.contains("Test description"));
1162    }
1163
1164    #[test]
1165    fn extract_attr_basic() {
1166        assert_eq!(
1167            extract_attr(r#"<img src="photo.jpg" alt="x">"#, "src"),
1168            Some("photo.jpg".to_string())
1169        );
1170    }
1171
1172    #[test]
1173    fn extract_attr_missing() {
1174        assert_eq!(extract_attr(r#"<img src="x.jpg">"#, "alt"), None);
1175    }
1176
1177    #[test]
1178    fn extract_page_text_strips_tags() {
1179        let html = "<body><p>Hello <b>world</b></p></body>";
1180        let text = extract_page_text(html, 100);
1181        assert_eq!(text, "Hello world");
1182    }
1183
1184    #[test]
1185    fn llm_plugin_name() {
1186        let plugin = LlmPlugin::new(LlmConfig::default());
1187        assert_eq!(plugin.name(), "llm");
1188    }
1189
1190    // ── Readability engine tests ──────────────────────────────────
1191
1192    #[test]
1193    fn flesch_kincaid_simple_text() {
1194        // "The cat sat on the mat." — very simple, ~grade 1
1195        let audit = ReadabilityAudit::analyze("The cat sat on the mat.");
1196        assert!(
1197            audit.grade_level < 4.0,
1198            "Simple text should be below grade 4, got {:.1}",
1199            audit.grade_level
1200        );
1201        assert!(audit.reading_ease > 80.0);
1202    }
1203
1204    #[test]
1205    fn flesch_kincaid_complex_text() {
1206        let text = "The implementation of sophisticated cryptographic \
1207                    algorithms necessitates comprehensive understanding \
1208                    of mathematical foundations. Asymmetric encryption \
1209                    protocols demonstrate considerable computational \
1210                    overhead compared to symmetric alternatives.";
1211        let audit = ReadabilityAudit::analyze(text);
1212        assert!(
1213            audit.grade_level > 12.0,
1214            "Complex text should be above grade 12, got {:.1}",
1215            audit.grade_level
1216        );
1217    }
1218
1219    #[test]
1220    fn flesch_kincaid_empty_text() {
1221        let audit = ReadabilityAudit::analyze("");
1222        assert!(audit.grade_level.abs() < f64::EPSILON);
1223        assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1224    }
1225
1226    #[test]
1227    fn syllable_count_known_words() {
1228        assert_eq!(count_word_syllables("cat"), 1);
1229        assert_eq!(count_word_syllables("hello"), 2);
1230        assert_eq!(count_word_syllables("beautiful"), 3);
1231        assert_eq!(count_word_syllables("implementation"), 5);
1232    }
1233
1234    #[test]
1235    fn count_sentences_basic() {
1236        assert_eq!(count_sentences("Hello. World!"), 2);
1237        assert_eq!(count_sentences("One sentence"), 1); // min 1
1238        assert_eq!(count_sentences("A? B? C!"), 3);
1239    }
1240
1241    // ── JSON-LD tests ───────────────────────────────────────────
1242
1243    #[test]
1244    fn inject_jsonld_adds_article_block() {
1245        let html = "<html><head><title>T</title></head><body></body></html>";
1246        let result = inject_jsonld_description(html, "Test desc");
1247        assert!(result.contains("application/ld+json"));
1248        assert!(result.contains("\"@type\":\"Article\""));
1249        assert!(result.contains("Test desc"));
1250    }
1251
1252    #[test]
1253    fn inject_jsonld_skips_existing() {
1254        let html = r#"<html><head><script type="application/ld+json">{"@type":"Article","description":"Existing"}</script></head></html>"#;
1255        let result = inject_jsonld_description(html, "New desc");
1256        assert!(!result.contains("New desc"));
1257        assert!(result.contains("Existing"));
1258    }
1259
1260    // ── Content audit tests ───────────────────────────────────────
1261
1262    #[test]
1263    fn audit_all_scans_markdown_files() {
1264        let dir = tempfile::tempdir().unwrap();
1265        let content = dir.path().join("content");
1266        fs::create_dir_all(&content).unwrap();
1267
1268        fs::write(
1269            content.join("simple.md"),
1270            "---\ntitle: Simple\n---\nThe cat sat on the mat. It was a good day.",
1271        )
1272        .unwrap();
1273        fs::write(
1274            content.join("complex.md"),
1275            "---\ntitle: Complex\n---\n\
1276             The implementation of sophisticated cryptographic algorithms \
1277             necessitates comprehensive understanding of mathematical \
1278             foundations and computational complexity theory.",
1279        )
1280        .unwrap();
1281
1282        let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
1283        assert_eq!(report.total_files, 2);
1284        assert!(report.failing > 0, "complex.md should fail grade 8");
1285    }
1286
1287    #[test]
1288    fn audit_all_empty_dir() {
1289        let dir = tempfile::tempdir().unwrap();
1290        let content = dir.path().join("empty");
1291        fs::create_dir_all(&content).unwrap();
1292
1293        let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
1294        assert_eq!(report.total_files, 0);
1295        assert_eq!(report.failing, 0);
1296    }
1297
1298    #[test]
1299    fn strip_frontmatter_yaml() {
1300        let input = "---\ntitle: Hello\n---\nBody text here.";
1301        let body = strip_frontmatter(input);
1302        assert!(body.contains("Body text here"));
1303        assert!(!body.contains("title:"));
1304    }
1305
1306    #[test]
1307    fn strip_frontmatter_toml() {
1308        let input = "+++\ntitle = \"Hello\"\n+++\nBody text here.";
1309        let body = strip_frontmatter(input);
1310        assert!(body.contains("Body text here"));
1311        assert!(!body.contains("title"));
1312    }
1313
1314    #[test]
1315    fn strip_frontmatter_none() {
1316        let input = "Just plain content.";
1317        assert_eq!(strip_frontmatter(input), input);
1318    }
1319
1320    #[test]
1321    fn split_frontmatter_preserves_delimiters() {
1322        let input = "---\ntitle: Hello\ndate: 2026-01-01\n---\n\n# Body text";
1323        let (fm, body) = split_frontmatter(input);
1324        assert!(fm.starts_with("---"));
1325        assert!(fm.ends_with("---"));
1326        assert!(fm.contains("title: Hello"));
1327        assert!(body.contains("# Body text"));
1328    }
1329
1330    #[test]
1331    fn split_frontmatter_toml_preserves() {
1332        let input = "+++\ntitle = \"Hello\"\n+++\nBody.";
1333        let (fm, body) = split_frontmatter(input);
1334        assert!(fm.starts_with("+++"));
1335        assert!(body.contains("Body."));
1336    }
1337
1338    #[test]
1339    fn split_frontmatter_no_frontmatter() {
1340        let input = "Just plain content.";
1341        let (fm, body) = split_frontmatter(input);
1342        assert!(fm.is_empty());
1343        assert_eq!(body, input);
1344    }
1345
1346    #[test]
1347    fn audit_and_fix_skips_when_ollama_unavailable() {
1348        let dir = tempfile::tempdir().unwrap();
1349        let content = dir.path().join("content");
1350        fs::create_dir_all(&content).unwrap();
1351        fs::write(content.join("test.md"), "---\ntitle: T\n---\nSimple text.")
1352            .unwrap();
1353
1354        let config = LlmConfig {
1355            endpoint: "http://localhost:99999".to_string(),
1356            ..LlmConfig::default()
1357        };
1358        let result = LlmPlugin::audit_and_fix(&content, &config).unwrap();
1359        assert_eq!(result, 0);
1360    }
1361
1362    #[test]
1363    fn full_repo_readability_audit() {
1364        // Audits ALL Markdown content across the entire repository.
1365        let dirs = [
1366            ("docs/guide", 15.0),
1367            ("examples/basic/content", 10.0),
1368            ("examples/blog/content", 10.0),
1369            ("examples/docs/content", 13.0),
1370            ("examples/landing/content", 10.0),
1371            ("examples/plugins/content", 10.0),
1372            ("examples/portfolio/content", 10.0),
1373            ("examples/quickstart/content", 10.0),
1374            ("examples/content/en", 10.0),
1375        ];
1376
1377        let mut total_files = 0usize;
1378        let mut total_pass = 0usize;
1379        let mut total_fail = 0usize;
1380
1381        println!("\n{}", "=".repeat(60));
1382        println!("  FULL REPOSITORY READABILITY AUDIT");
1383        println!("{}\n", "=".repeat(60));
1384
1385        for (dir, target) in &dirs {
1386            let path = Path::new(dir);
1387            if !path.exists() {
1388                continue;
1389            }
1390
1391            let report = LlmPlugin::audit_all(path, *target).unwrap();
1392            if report.total_files == 0 {
1393                continue;
1394            }
1395
1396            println!("── {dir} (target: grade {target:.0}) ��─");
1397            for r in &report.results {
1398                let status = if r.passes { "PASS" } else { "FAIL" };
1399                println!(
1400                    "  {:.<40} grade {:>5.1}  ease {:>5.1}  [{status}]",
1401                    r.path, r.grade_level, r.reading_ease
1402                );
1403            }
1404            println!("  → {}/{} pass\n", report.passing, report.total_files);
1405
1406            total_files += report.total_files;
1407            total_pass += report.passing;
1408            total_fail += report.failing;
1409        }
1410
1411        println!("{}", "=".repeat(60));
1412        println!(
1413            "  TOTAL: {total_files} files — {total_pass} pass, {total_fail} fail"
1414        );
1415        println!("{}\n", "=".repeat(60));
1416    }
1417
1418    #[test]
1419    fn audit_docs_guide() {
1420        // This test is called by the readability-gate CI workflow.
1421        // It audits all .md files in docs/guide/ against grade 17
1422        // (documentation is technical and includes code blocks which
1423        // inflate Flesch-Kincaid scores).
1424        let guide_dir = Path::new("docs/guide");
1425        if !guide_dir.exists() {
1426            return; // Skip in environments without the guide
1427        }
1428
1429        let report = LlmPlugin::audit_all(guide_dir, 17.0).unwrap();
1430        for result in &report.results {
1431            let status = if result.passes { "PASS" } else { "FAIL" };
1432            println!(
1433                "[readability] {}: grade={:.1}, ease={:.1}, avg_sentence={:.1} — {}",
1434                result.path,
1435                result.grade_level,
1436                result.reading_ease,
1437                result.avg_sentence_len,
1438                status
1439            );
1440        }
1441
1442        println!(
1443            "\n[readability] {}/{} files pass (target: grade {:.0})",
1444            report.passing, report.total_files, report.target_grade
1445        );
1446    }
1447
1448    // ── Coverage gap tests ────────────────────────────────────────
1449
1450    #[test]
1451    fn is_ollama_available_unreachable() {
1452        assert!(!is_ollama_available("http://localhost:99999"));
1453    }
1454
1455    #[test]
1456    fn call_ollama_unreachable_returns_none() {
1457        assert!(call_ollama("http://localhost:99999", "llama3", "hi").is_none());
1458    }
1459
1460    #[test]
1461    fn needs_meta_description_with_content_attr_first() {
1462        // content= before name= (different ordering)
1463        let html = r#"<meta content="Decent length description that is more than fifty characters long enough" name="description">"#;
1464        // name="description" is present so returns false-ish check
1465        assert!(!needs_meta_description(html));
1466    }
1467
1468    #[test]
1469    fn inject_meta_description_no_head() {
1470        let html = "<html><body>No head tag</body></html>";
1471        let result = inject_meta_description(html, "desc");
1472        assert_eq!(result, html); // unchanged
1473    }
1474
1475    #[test]
1476    fn inject_jsonld_no_head() {
1477        let html = "<html><body>No head</body></html>";
1478        let result = inject_jsonld_description(html, "desc");
1479        assert_eq!(result, html);
1480    }
1481
1482    #[test]
1483    fn extract_page_text_no_body() {
1484        let html = "just plain text no tags";
1485        let text = extract_page_text(html, 100);
1486        assert_eq!(text, "just plain text no tags");
1487    }
1488
1489    #[test]
1490    fn extract_page_text_truncates() {
1491        let html = "<body><p>word </p></body>";
1492        let text = extract_page_text(html, 3);
1493        assert!(text.len() <= 5);
1494    }
1495
1496    #[test]
1497    fn generate_missing_alt_text_no_images() {
1498        let mut html = "<html><body><p>No images</p></body></html>".to_string();
1499        let count = generate_missing_alt_text(
1500            &mut html,
1501            "llama3",
1502            "http://localhost:99999",
1503            true,
1504            Path::new("test.html"),
1505            Path::new("."),
1506        );
1507        assert_eq!(count, 0);
1508    }
1509
1510    #[test]
1511    fn readability_audit_single_word() {
1512        let audit = ReadabilityAudit::analyze("Hello");
1513        assert!(audit.grade_level >= 0.0);
1514        assert!(audit.avg_sentence_len >= 0.0);
1515    }
1516
1517    #[test]
1518    fn count_word_syllables_empty() {
1519        assert_eq!(count_word_syllables(""), 1);
1520    }
1521
1522    #[test]
1523    fn count_word_syllables_numbers() {
1524        assert_eq!(count_word_syllables("123"), 1);
1525    }
1526
1527    #[test]
1528    fn split_frontmatter_unclosed() {
1529        let input = "---\ntitle: Hello\nNo closing delimiter";
1530        let (fm, body) = split_frontmatter(input);
1531        assert!(fm.is_empty());
1532        assert_eq!(body, input);
1533    }
1534
1535    #[test]
1536    fn llm_plugin_skips_missing_site_dir() {
1537        let plugin = LlmPlugin::new(LlmConfig::default());
1538        let ctx = PluginContext::new(
1539            Path::new("/tmp/c"),
1540            Path::new("/tmp/b"),
1541            Path::new("/nonexistent/site"),
1542            Path::new("/tmp/t"),
1543        );
1544        assert!(plugin.after_compile(&ctx).is_ok());
1545    }
1546
1547    #[test]
1548    fn config_defaults_readability() {
1549        let config = LlmConfig::default();
1550        assert!((config.target_grade - 8.0).abs() < f64::EPSILON);
1551        assert_eq!(config.max_refinement_attempts, 1);
1552    }
1553
1554    #[test]
1555    fn llm_plugin_skips_when_ollama_unavailable() {
1556        let plugin = LlmPlugin::new(LlmConfig {
1557            endpoint: "http://localhost:99999".to_string(),
1558            ..LlmConfig::default()
1559        });
1560
1561        let dir = tempfile::tempdir().unwrap();
1562        let site = dir.path().join("site");
1563        fs::create_dir_all(&site).unwrap();
1564        fs::write(site.join("index.html"), "<html><body></body></html>")
1565            .unwrap();
1566
1567        let ctx = PluginContext::new(dir.path(), dir.path(), &site, dir.path());
1568        // Should succeed (graceful skip)
1569        plugin.after_compile(&ctx).unwrap();
1570    }
1571
1572    // ── Agentic AI fix pipeline tests ────────────────────────────
1573
1574    #[test]
1575    fn ai_fix_report_serializes_to_json() {
1576        let report = AiFixReport {
1577            total_audited: 10,
1578            total_failing: 3,
1579            total_fixed: 2,
1580            results: vec![
1581                AiFixResult {
1582                    path: "docs/guide.md".to_string(),
1583                    before_grade: 12.5,
1584                    after_grade: 7.2,
1585                    improved: true,
1586                    action: "rewritten".to_string(),
1587                },
1588                AiFixResult {
1589                    path: "docs/api.md".to_string(),
1590                    before_grade: 14.0,
1591                    after_grade: 13.8,
1592                    improved: false,
1593                    action: "no-improvement".to_string(),
1594                },
1595            ],
1596        };
1597        let json = serde_json::to_string(&report).unwrap();
1598        assert!(json.contains("\"total_fixed\":2"));
1599        assert!(json.contains("\"action\":\"rewritten\""));
1600    }
1601
1602    #[test]
1603    fn ai_fix_report_skips_when_ollama_unavailable() {
1604        let dir = tempfile::tempdir().unwrap();
1605        let content = dir.path().join("content");
1606        fs::create_dir_all(&content).unwrap();
1607        fs::write(
1608            content.join("test.md"),
1609            "---\ntitle: T\n---\nThe implementation of sophisticated algorithms.",
1610        )
1611        .unwrap();
1612
1613        let config = LlmConfig {
1614            endpoint: "http://localhost:99999".to_string(),
1615            max_refinement_attempts: 3,
1616            ..LlmConfig::default()
1617        };
1618        let report =
1619            LlmPlugin::audit_and_fix_with_report(&content, &config).unwrap();
1620        assert_eq!(report.total_fixed, 0);
1621        assert!(report.results.is_empty());
1622    }
1623
1624    // ── Multilingual readability tests ──────────────────────────
1625
1626    #[test]
1627    fn formula_from_lang_english() {
1628        assert_eq!(
1629            ReadabilityFormula::from_lang("en"),
1630            Some(ReadabilityFormula::FleschKincaid)
1631        );
1632        assert_eq!(
1633            ReadabilityFormula::from_lang("en-US"),
1634            Some(ReadabilityFormula::FleschKincaid)
1635        );
1636    }
1637
1638    #[test]
1639    fn formula_from_lang_french() {
1640        assert_eq!(
1641            ReadabilityFormula::from_lang("fr"),
1642            Some(ReadabilityFormula::KandelMoles)
1643        );
1644        assert_eq!(
1645            ReadabilityFormula::from_lang("fr-CA"),
1646            Some(ReadabilityFormula::KandelMoles)
1647        );
1648    }
1649
1650    #[test]
1651    fn formula_from_lang_german() {
1652        assert_eq!(
1653            ReadabilityFormula::from_lang("de"),
1654            Some(ReadabilityFormula::WienerSachtextformel)
1655        );
1656        assert_eq!(
1657            ReadabilityFormula::from_lang("de-AT"),
1658            Some(ReadabilityFormula::WienerSachtextformel)
1659        );
1660    }
1661
1662    #[test]
1663    fn formula_from_lang_italian() {
1664        assert_eq!(
1665            ReadabilityFormula::from_lang("it"),
1666            Some(ReadabilityFormula::Gulpease)
1667        );
1668    }
1669
1670    #[test]
1671    fn formula_from_lang_swedish() {
1672        assert_eq!(
1673            ReadabilityFormula::from_lang("sv"),
1674            Some(ReadabilityFormula::Lix)
1675        );
1676        assert_eq!(
1677            ReadabilityFormula::from_lang("nb"),
1678            Some(ReadabilityFormula::Lix)
1679        );
1680        assert_eq!(
1681            ReadabilityFormula::from_lang("da"),
1682            Some(ReadabilityFormula::Lix)
1683        );
1684    }
1685
1686    #[test]
1687    fn formula_from_lang_spanish() {
1688        assert_eq!(
1689            ReadabilityFormula::from_lang("es"),
1690            Some(ReadabilityFormula::FernandezHuerta)
1691        );
1692    }
1693
1694    #[test]
1695    fn formula_from_lang_unsupported() {
1696        assert_eq!(ReadabilityFormula::from_lang("ja"), None);
1697        assert_eq!(ReadabilityFormula::from_lang("zh"), None);
1698        assert_eq!(ReadabilityFormula::from_lang("ar"), None);
1699    }
1700
1701    #[test]
1702    fn kandel_moles_simple_french() {
1703        let text = "Le chat est sur le tapis. Il fait beau. Le soleil brille.";
1704        let audit = ReadabilityAudit::analyze_with_lang(text, "fr");
1705        assert!(
1706            audit.reading_ease > 50.0,
1707            "Simple French should be readable, got {:.1}",
1708            audit.reading_ease
1709        );
1710    }
1711
1712    #[test]
1713    fn wiener_simple_german() {
1714        let text = "Die Katze sitzt auf der Matte. Es ist ein guter Tag. Die Sonne scheint.";
1715        let audit = ReadabilityAudit::analyze_with_lang(text, "de");
1716        assert!(
1717            audit.grade_level < 15.0,
1718            "Simple German got grade {:.1}",
1719            audit.grade_level
1720        );
1721    }
1722
1723    #[test]
1724    fn gulpease_simple_italian() {
1725        let text = "Il gatto si siede sul tappeto. Il sole splende. Oggi è una bella giornata.";
1726        let audit = ReadabilityAudit::analyze_with_lang(text, "it");
1727        assert!(
1728            audit.reading_ease > 40.0,
1729            "Simple Italian got ease {:.1}",
1730            audit.reading_ease
1731        );
1732    }
1733
1734    #[test]
1735    fn lix_simple_swedish() {
1736        let text = "Katten sitter på mattan. Solen skiner. Det är en fin dag.";
1737        let audit = ReadabilityAudit::analyze_with_lang(text, "sv");
1738        assert!(audit.grade_level >= 0.0);
1739        assert!(audit.reading_ease > 0.0);
1740    }
1741
1742    #[test]
1743    fn fernandez_huerta_simple_spanish() {
1744        let text = "El gato está en la mesa. El sol brilla. Es un buen día.";
1745        let audit = ReadabilityAudit::analyze_with_lang(text, "es");
1746        assert!(
1747            audit.reading_ease > 50.0,
1748            "Simple Spanish got ease {:.1}",
1749            audit.reading_ease
1750        );
1751    }
1752
1753    #[test]
1754    fn analyze_with_lang_empty_defaults_to_english() {
1755        let text = "The cat sat on the mat.";
1756        let a = ReadabilityAudit::analyze(text);
1757        let b = ReadabilityAudit::analyze_with_lang(text, "");
1758        assert!((a.grade_level - b.grade_level).abs() < f64::EPSILON);
1759    }
1760
1761    #[test]
1762    fn analyze_with_lang_unsupported_falls_back() {
1763        let text = "The cat sat on the mat.";
1764        let a = ReadabilityAudit::analyze(text);
1765        let b = ReadabilityAudit::analyze_with_lang(text, "ja");
1766        assert!((a.grade_level - b.grade_level).abs() < f64::EPSILON);
1767    }
1768
1769    #[test]
1770    fn extract_frontmatter_lang_yaml() {
1771        let content = "---\ntitle: Hello\nlanguage: fr\n---\nBody.";
1772        assert_eq!(extract_frontmatter_lang(content), "fr");
1773    }
1774
1775    #[test]
1776    fn extract_frontmatter_lang_yaml_short() {
1777        let content = "---\ntitle: Hello\nlang: de\n---\nBody.";
1778        assert_eq!(extract_frontmatter_lang(content), "de");
1779    }
1780
1781    #[test]
1782    fn extract_frontmatter_lang_toml() {
1783        let content = "+++\ntitle = \"Hello\"\nlanguage = \"it\"\n+++\nBody.";
1784        assert_eq!(extract_frontmatter_lang(content), "it");
1785    }
1786
1787    #[test]
1788    fn extract_frontmatter_lang_missing() {
1789        let content = "---\ntitle: Hello\n---\nBody.";
1790        assert_eq!(extract_frontmatter_lang(content), "");
1791    }
1792
1793    #[test]
1794    fn extract_frontmatter_lang_no_frontmatter() {
1795        let content = "Just plain text.";
1796        assert_eq!(extract_frontmatter_lang(content), "");
1797    }
1798
1799    #[test]
1800    fn audit_all_respects_language() {
1801        let dir = tempfile::tempdir().unwrap();
1802        let content = dir.path().join("content");
1803        fs::create_dir_all(&content).unwrap();
1804
1805        fs::write(
1806            content.join("french.md"),
1807            "---\ntitle: Bonjour\nlanguage: fr\n---\nLe chat est sur le tapis. Il fait beau.",
1808        )
1809        .unwrap();
1810
1811        let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
1812        assert_eq!(report.total_files, 1);
1813        // Should use Kandel-Moles, not Flesch-Kincaid
1814    }
1815
1816    // ── Multilingual formulas: empty text ────────────────────────
1817
1818    #[test]
1819    fn kandel_moles_empty_text() {
1820        let audit = ReadabilityAudit::analyze_with_lang("", "fr");
1821        assert!(audit.grade_level.abs() < f64::EPSILON);
1822        assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1823        assert!(audit.avg_sentence_len.abs() < f64::EPSILON);
1824    }
1825
1826    #[test]
1827    fn wiener_empty_text() {
1828        let audit = ReadabilityAudit::analyze_with_lang("", "de");
1829        assert!(audit.grade_level.abs() < f64::EPSILON);
1830        assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1831    }
1832
1833    #[test]
1834    fn gulpease_empty_text() {
1835        let audit = ReadabilityAudit::analyze_with_lang("", "it");
1836        assert!(audit.grade_level.abs() < f64::EPSILON);
1837        assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1838    }
1839
1840    #[test]
1841    fn lix_empty_text() {
1842        let audit = ReadabilityAudit::analyze_with_lang("", "sv");
1843        assert!(audit.grade_level.abs() < f64::EPSILON);
1844        assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1845    }
1846
1847    #[test]
1848    fn fernandez_huerta_empty_text() {
1849        let audit = ReadabilityAudit::analyze_with_lang("", "es");
1850        assert!(audit.grade_level.abs() < f64::EPSILON);
1851        assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
1852    }
1853
1854    // ── Multilingual formulas: single-word text ──────────────────
1855
1856    #[test]
1857    fn kandel_moles_single_word() {
1858        let audit = ReadabilityAudit::analyze_with_lang("Bonjour", "fr");
1859        assert!(audit.grade_level >= 0.0);
1860        assert!(audit.reading_ease >= 0.0);
1861        assert!(audit.avg_sentence_len >= 1.0);
1862    }
1863
1864    #[test]
1865    fn wiener_single_word() {
1866        let audit = ReadabilityAudit::analyze_with_lang("Hallo", "de");
1867        assert!(audit.grade_level >= 0.0);
1868        assert!(audit.avg_sentence_len >= 1.0);
1869    }
1870
1871    #[test]
1872    fn gulpease_single_word() {
1873        let audit = ReadabilityAudit::analyze_with_lang("Ciao", "it");
1874        assert!(audit.grade_level >= 0.0);
1875        assert!(audit.avg_sentence_len >= 1.0);
1876    }
1877
1878    #[test]
1879    fn lix_single_word() {
1880        let audit = ReadabilityAudit::analyze_with_lang("Hej", "sv");
1881        assert!(audit.grade_level >= 0.0);
1882    }
1883
1884    #[test]
1885    fn fernandez_huerta_single_word() {
1886        let audit = ReadabilityAudit::analyze_with_lang("Hola", "es");
1887        assert!(audit.grade_level >= 0.0);
1888    }
1889
1890    // ── Multilingual formulas: long text ─────────────────────────
1891
1892    #[test]
1893    fn kandel_moles_long_text() {
1894        let text = "Le développement de nouvelles infrastructures \
1895                    technologiques nécessite une compréhension \
1896                    approfondie des systèmes complexes. \
1897                    Les algorithmes sophistiqués démontrent \
1898                    une efficacité considérable. \
1899                    La modernisation progressive des architectures \
1900                    informatiques représente un défi majeur.";
1901        let audit = ReadabilityAudit::analyze_with_lang(text, "fr");
1902        assert!(audit.grade_level > 0.0);
1903        assert!(audit.reading_ease >= 0.0);
1904        assert!(audit.avg_sentence_len > 1.0);
1905    }
1906
1907    #[test]
1908    fn wiener_long_text() {
1909        let text = "Die Implementierung fortschrittlicher kryptografischer \
1910                    Algorithmen erfordert umfassendes Verständnis \
1911                    mathematischer Grundlagen. Asymmetrische \
1912                    Verschlüsselungsprotokolle weisen erheblichen \
1913                    Rechenaufwand auf. Die systematische Optimierung \
1914                    komplexer Datenstrukturen bleibt herausfordernd.";
1915        let audit = ReadabilityAudit::analyze_with_lang(text, "de");
1916        assert!(audit.grade_level > 0.0);
1917        assert!(audit.avg_sentence_len > 1.0);
1918    }
1919
1920    #[test]
1921    fn gulpease_long_text() {
1922        let text = "L'implementazione di algoritmi crittografici sofisticati \
1923                    richiede una comprensione approfondita dei fondamenti \
1924                    matematici. I protocolli di crittografia asimmetrica \
1925                    dimostrano un considerevole sovraccarico computazionale. \
1926                    L'ottimizzazione sistematica delle strutture dati \
1927                    complesse rimane impegnativa.";
1928        let audit = ReadabilityAudit::analyze_with_lang(text, "it");
1929        assert!(audit.grade_level > 0.0);
1930        assert!(audit.avg_sentence_len > 1.0);
1931    }
1932
1933    #[test]
1934    fn lix_long_text() {
1935        let text = "Implementeringen av avancerade kryptografiska algoritmer \
1936                    kräver omfattande förståelse av matematiska grunder. \
1937                    Asymmetriska krypteringsprotokoll uppvisar betydande \
1938                    beräkningsbelastning. Systematisk optimering av komplexa \
1939                    datastrukturer förblir utmanande.";
1940        let audit = ReadabilityAudit::analyze_with_lang(text, "sv");
1941        assert!(audit.grade_level > 0.0);
1942        assert!(audit.avg_sentence_len > 1.0);
1943    }
1944
1945    #[test]
1946    fn fernandez_huerta_long_text() {
1947        let text =
1948            "La implementación de algoritmos criptográficos sofisticados \
1949                    requiere una comprensión profunda de los fundamentos \
1950                    matemáticos. Los protocolos de cifrado asimétrico \
1951                    demuestran una considerable sobrecarga computacional. \
1952                    La optimización sistemática de estructuras de datos \
1953                    complejas sigue siendo un desafío.";
1954        let audit = ReadabilityAudit::analyze_with_lang(text, "es");
1955        assert!(audit.grade_level > 0.0);
1956        assert!(audit.avg_sentence_len > 1.0);
1957    }
1958
1959    // ── WienerSachtextformel: varying syllable counts ────────────
1960
1961    #[test]
1962    fn wiener_mixed_syllable_words() {
1963        // Mix of 1-syllable, 2-syllable, 3+ syllable words
1964        let text = "Ich bin gut. Das Haus ist sehr interessant. \
1965                    Die Universität hat viele Studenten.";
1966        let audit = ReadabilityAudit::analyze_with_lang(text, "de");
1967        assert!(audit.grade_level >= 0.0);
1968        assert!(audit.reading_ease >= 0.0);
1969        assert!(audit.reading_ease <= 100.0);
1970    }
1971
1972    // ── LIX: varying character lengths ───────────────────────────
1973
1974    #[test]
1975    fn lix_mixed_word_lengths() {
1976        // Short words and long words (>6 chars) to exercise long-word filter
1977        let text = "En bok om programmering. \
1978                    Datavetenskapliga beräkningar kräver noggrannhet.";
1979        let audit = ReadabilityAudit::analyze_with_lang(text, "sv");
1980        assert!(audit.grade_level > 0.0);
1981        assert!(audit.reading_ease >= 0.0);
1982        assert!(audit.reading_ease <= 100.0);
1983    }
1984
1985    // ── extract_frontmatter_lang() edge cases ────────────────────
1986
1987    #[test]
1988    fn extract_frontmatter_lang_toml_with_quotes() {
1989        let content =
1990            "+++\ntitle = \"Hello\"\nlanguage = \"en-US\"\n+++\nBody.";
1991        assert_eq!(extract_frontmatter_lang(content), "en-US");
1992    }
1993
1994    #[test]
1995    fn extract_frontmatter_lang_first_wins() {
1996        // language appears before lang — first one should win
1997        let content = "---\nlanguage: fr\nlang: de\n---\nBody.";
1998        assert_eq!(extract_frontmatter_lang(content), "fr");
1999    }
2000
2001    #[test]
2002    fn extract_frontmatter_lang_whitespace_around_value() {
2003        let content = "---\nlanguage:   es  \n---\nBody.";
2004        assert_eq!(extract_frontmatter_lang(content), "es");
2005    }
2006
2007    #[test]
2008    fn extract_frontmatter_lang_yaml_quoted_value() {
2009        let content = "---\nlanguage: \"de\"\n---\nBody.";
2010        assert_eq!(extract_frontmatter_lang(content), "de");
2011    }
2012
2013    #[test]
2014    fn extract_frontmatter_lang_single_quoted() {
2015        let content = "---\nlanguage: 'it'\n---\nBody.";
2016        assert_eq!(extract_frontmatter_lang(content), "it");
2017    }
2018
2019    #[test]
2020    fn extract_frontmatter_lang_empty_value() {
2021        let content = "---\nlanguage: \n---\nBody.";
2022        assert_eq!(extract_frontmatter_lang(content), "");
2023    }
2024
2025    #[test]
2026    fn extract_frontmatter_lang_toml_lang_key() {
2027        let content = "+++\nlang = \"sv\"\n+++\nBody.";
2028        assert_eq!(extract_frontmatter_lang(content), "sv");
2029    }
2030
2031    // ── audit_and_fix_with_report edge cases ─────────────────────
2032
2033    #[test]
2034    fn audit_and_fix_with_report_all_passing() {
2035        let dir = tempfile::tempdir().unwrap();
2036        let content = dir.path().join("content");
2037        fs::create_dir_all(&content).unwrap();
2038
2039        // Very simple text that passes any reasonable threshold
2040        fs::write(
2041            content.join("simple.md"),
2042            "---\ntitle: Simple\n---\nThe cat sat. It was good.",
2043        )
2044        .unwrap();
2045
2046        // Use a high target so everything passes
2047        let config = LlmConfig {
2048            endpoint: "http://localhost:99999".to_string(),
2049            target_grade: 20.0,
2050            ..LlmConfig::default()
2051        };
2052        let report =
2053            LlmPlugin::audit_and_fix_with_report(&content, &config).unwrap();
2054        // Ollama unreachable => empty report, but test the path
2055        assert_eq!(report.total_fixed, 0);
2056    }
2057
2058    #[test]
2059    fn audit_and_fix_with_report_empty_dir() {
2060        let dir = tempfile::tempdir().unwrap();
2061        let content = dir.path().join("empty_content");
2062        fs::create_dir_all(&content).unwrap();
2063
2064        let config = LlmConfig {
2065            endpoint: "http://localhost:99999".to_string(),
2066            ..LlmConfig::default()
2067        };
2068        let report =
2069            LlmPlugin::audit_and_fix_with_report(&content, &config).unwrap();
2070        assert_eq!(report.total_audited, 0);
2071        assert_eq!(report.total_failing, 0);
2072        assert!(report.results.is_empty());
2073    }
2074
2075    #[test]
2076    fn audit_all_file_with_empty_body() {
2077        let dir = tempfile::tempdir().unwrap();
2078        let content = dir.path().join("content");
2079        fs::create_dir_all(&content).unwrap();
2080
2081        fs::write(content.join("empty_body.md"), "---\ntitle: T\n---\n")
2082            .unwrap();
2083
2084        let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
2085        assert_eq!(report.total_files, 1);
2086        // Empty body => grade 0, passes any threshold
2087        assert_eq!(report.passing, 1);
2088    }
2089
2090    // ── needs_meta_description edge cases ────────────────────────
2091
2092    #[test]
2093    fn needs_meta_description_no_content_attr() {
2094        // Has name="description" but no content attribute
2095        let html = r#"<meta name="description">"#;
2096        // name="description" is found, but content= search fails,
2097        // so falls through to the !html.contains check which is false
2098        assert!(!needs_meta_description(html));
2099    }
2100
2101    #[test]
2102    fn needs_meta_description_multiple_meta_tags() {
2103        let html = r#"<meta name="author" content="Alice"><meta name="description" content="This is a sufficiently long description that is more than fifty characters long">"#;
2104        assert!(!needs_meta_description(html));
2105    }
2106
2107    #[test]
2108    fn needs_meta_description_empty_content() {
2109        let html = r#"<meta name="description" content="">"#;
2110        assert!(needs_meta_description(html));
2111    }
2112
2113    // ── inject_meta_description with special chars ───────────────
2114
2115    #[test]
2116    fn inject_meta_description_escapes_ampersand() {
2117        let html = "<html><head></head><body></body></html>";
2118        let result = inject_meta_description(html, "Tom & Jerry");
2119        assert!(result.contains("Tom &amp; Jerry"));
2120    }
2121
2122    #[test]
2123    fn inject_meta_description_escapes_quotes() {
2124        let html = "<html><head></head><body></body></html>";
2125        let result = inject_meta_description(html, r#"A "great" page"#);
2126        assert!(result.contains("A &quot;great&quot; page"));
2127    }
2128
2129    #[test]
2130    fn inject_meta_description_escapes_angle_brackets() {
2131        let html = "<html><head></head><body></body></html>";
2132        let result = inject_meta_description(html, "x < y");
2133        assert!(result.contains("x &lt; y"));
2134    }
2135
2136    #[test]
2137    fn inject_meta_description_all_special_chars() {
2138        let html = "<html><head></head><body></body></html>";
2139        let result = inject_meta_description(html, r#"A & B "C" <D>"#);
2140        // The function escapes &, ", < but not > (only the dangerous chars in attribute context)
2141        assert!(result.contains("A &amp; B &quot;C&quot; &lt;D>"));
2142    }
2143
2144    // ── extract_page_text edge cases ─────────────────────────────
2145
2146    #[test]
2147    fn extract_page_text_with_main_tag() {
2148        let html = "<html><body><div>ignored</div><main><p>Main content here.</p></main></body></html>";
2149        let text = extract_page_text(html, 500);
2150        assert!(text.contains("Main content here"));
2151        // "ignored" is before <main>, so it should not appear
2152        assert!(!text.contains("ignored"));
2153    }
2154
2155    #[test]
2156    fn extract_page_text_large_truncated() {
2157        let long_body = "word ".repeat(200);
2158        let html = format!("<body><p>{long_body}</p></body>");
2159        let text = extract_page_text(&html, 50);
2160        // Should be truncated well under the full 1000-char body
2161        assert!(text.len() <= 60);
2162    }
2163
2164    #[test]
2165    fn extract_page_text_strips_control_chars() {
2166        let html = "<body>Hello\x00\x01World</body>";
2167        let text = extract_page_text(html, 100);
2168        assert_eq!(text, "HelloWorld");
2169    }
2170
2171    #[test]
2172    fn extract_page_text_nested_tags() {
2173        let html = "<body><div><span>A</span> <em>B</em></div></body>";
2174        let text = extract_page_text(html, 100);
2175        assert!(text.contains('A'));
2176        assert!(text.contains('B'));
2177    }
2178
2179    // ── generate_missing_alt_text edge cases ─────────────────────
2180
2181    #[test]
2182    fn generate_missing_alt_text_empty_alt() {
2183        let mut html =
2184            r#"<html><body><img src="photo.jpg" alt=""></body></html>"#
2185                .to_string();
2186        // Ollama unreachable, so count stays 0, but exercises the tag detection
2187        let count = generate_missing_alt_text(
2188            &mut html,
2189            "llama3",
2190            "http://localhost:99999",
2191            false,
2192            Path::new("test.html"),
2193            Path::new("."),
2194        );
2195        // Can't generate without Ollama, but exercises alt="" detection path
2196        assert_eq!(count, 0);
2197    }
2198
2199    #[test]
2200    fn generate_missing_alt_text_missing_closing_bracket() {
2201        let mut html =
2202            "<html><body><img src=\"photo.jpg\"</body></html>".to_string();
2203        let count = generate_missing_alt_text(
2204            &mut html,
2205            "llama3",
2206            "http://localhost:99999",
2207            false,
2208            Path::new("test.html"),
2209            Path::new("."),
2210        );
2211        assert_eq!(count, 0);
2212    }
2213
2214    #[test]
2215    fn generate_missing_alt_text_mixed_images() {
2216        let mut html = r#"<html><body>
2217            <img src="a.jpg" alt="Good alt">
2218            <img src="b.jpg">
2219            <img src="c.jpg" alt="">
2220        </body></html>"#
2221            .to_string();
2222        // Exercises the loop: first image has alt (skipped),
2223        // second has no alt, third has empty alt.
2224        // Ollama unreachable so no actual generation.
2225        let count = generate_missing_alt_text(
2226            &mut html,
2227            "llama3",
2228            "http://localhost:99999",
2229            true,
2230            Path::new("test.html"),
2231            Path::new("."),
2232        );
2233        assert_eq!(count, 0);
2234    }
2235
2236    #[test]
2237    fn generate_missing_alt_text_with_alt_present() {
2238        let mut html =
2239            r#"<html><body><img src="x.jpg" alt="Has alt text"></body></html>"#
2240                .to_string();
2241        let count = generate_missing_alt_text(
2242            &mut html,
2243            "llama3",
2244            "http://localhost:99999",
2245            false,
2246            Path::new("test.html"),
2247            Path::new("."),
2248        );
2249        assert_eq!(count, 0);
2250    }
2251
2252    // ── ReadabilityFormula edge cases ─────────────────────────────
2253
2254    #[test]
2255    fn formula_from_lang_underscore_separator() {
2256        assert_eq!(
2257            ReadabilityFormula::from_lang("en_US"),
2258            Some(ReadabilityFormula::FleschKincaid)
2259        );
2260        assert_eq!(
2261            ReadabilityFormula::from_lang("de_DE"),
2262            Some(ReadabilityFormula::WienerSachtextformel)
2263        );
2264    }
2265
2266    #[test]
2267    fn formula_from_lang_norwegian_variants() {
2268        assert_eq!(
2269            ReadabilityFormula::from_lang("nn"),
2270            Some(ReadabilityFormula::Lix)
2271        );
2272        assert_eq!(
2273            ReadabilityFormula::from_lang("no"),
2274            Some(ReadabilityFormula::Lix)
2275        );
2276    }
2277
2278    // ── LlmConfig / LlmPlugin additional coverage ───────────────
2279
2280    #[test]
2281    fn llm_config_default_values() {
2282        let config = LlmConfig::default();
2283        assert_eq!(config.model, "llama3");
2284        assert_eq!(config.endpoint, "http://localhost:11434");
2285        assert!(!config.dry_run);
2286    }
2287
2288    #[test]
2289    fn llm_plugin_debug_impl() {
2290        let plugin = LlmPlugin::new(LlmConfig::default());
2291        let debug = format!("{plugin:?}");
2292        assert!(debug.contains("LlmPlugin"));
2293        assert!(debug.contains("llama3"));
2294    }
2295
2296    // ── split_frontmatter edge cases ─────────────────────────────
2297
2298    #[test]
2299    fn split_frontmatter_leading_whitespace() {
2300        let input = "  ---\ntitle: Hello\n---\nBody.";
2301        let (fm, body) = split_frontmatter(input);
2302        assert!(fm.contains("title: Hello"));
2303        assert!(body.contains("Body."));
2304    }
2305
2306    #[test]
2307    fn split_frontmatter_toml_unclosed() {
2308        let input = "+++\ntitle = \"Hello\"\nNo closing delimiter";
2309        let (fm, body) = split_frontmatter(input);
2310        assert!(fm.is_empty());
2311        assert_eq!(body, input);
2312    }
2313
2314    // ── FileAuditResult / AuditReport serialization ──────────────
2315
2316    #[test]
2317    fn file_audit_result_serializes() {
2318        let result = FileAuditResult {
2319            path: "test.md".to_string(),
2320            grade_level: 7.5,
2321            reading_ease: 65.0,
2322            avg_sentence_len: 12.0,
2323            passes: true,
2324        };
2325        let json = serde_json::to_string(&result).unwrap();
2326        assert!(json.contains("\"path\":\"test.md\""));
2327        assert!(json.contains("\"passes\":true"));
2328    }
2329
2330    #[test]
2331    fn audit_report_serializes() {
2332        let report = AuditReport {
2333            target_grade: 8.0,
2334            total_files: 2,
2335            passing: 1,
2336            failing: 1,
2337            results: vec![],
2338        };
2339        let json = serde_json::to_string(&report).unwrap();
2340        assert!(json.contains("\"target_grade\":8.0"));
2341        assert!(json.contains("\"total_files\":2"));
2342    }
2343
2344    // ── inject_jsonld_description edge cases ─────────────────────
2345
2346    #[test]
2347    fn inject_jsonld_with_special_chars() {
2348        let html = "<html><head></head><body></body></html>";
2349        let result = inject_jsonld_description(html, "Tom & Jerry's \"show\"");
2350        assert!(result.contains("application/ld+json"));
2351        assert!(result.contains("Tom & Jerry"));
2352    }
2353
2354    // ── count_syllables edge cases ───────────────────────────────
2355
2356    #[test]
2357    fn count_syllables_multiple_vowel_groups() {
2358        // "beautiful" has vowel groups: eau-i-u => 3 groups, minus silent e = stays
2359        assert!(count_word_syllables("beautiful") >= 2);
2360    }
2361
2362    #[test]
2363    fn count_syllables_consecutive_vowels() {
2364        // "queue" => qu-eu-e: vowel groups = 2, minus trailing e = 1
2365        assert_eq!(count_word_syllables("queue"), 1);
2366    }
2367
2368    #[test]
2369    fn count_syllables_all_consonants() {
2370        // "rhythm" => y is a vowel => 1 vowel group
2371        assert_eq!(count_word_syllables("rhythm"), 1);
2372    }
2373
2374    #[test]
2375    fn count_syllables_text_total() {
2376        let total = count_syllables("The cat sat on the mat.");
2377        assert!(total >= 6); // 6 monosyllabic words
2378    }
2379
2380    #[test]
2381    fn count_words_basic() {
2382        assert_eq!(count_words("one two three"), 3);
2383        assert_eq!(count_words(""), 0);
2384        assert_eq!(count_words("   "), 0);
2385        assert_eq!(count_words("single"), 1);
2386    }
2387
2388    // ── Readability: numeric edge cases ──────────────────────────
2389
2390    #[test]
2391    fn readability_grade_never_negative() {
2392        // Single short word => formula could produce negative, clamped to 0
2393        let audit = ReadabilityAudit::analyze("Hi.");
2394        assert!(audit.grade_level >= 0.0);
2395        assert!(audit.reading_ease >= 0.0);
2396        assert!(audit.reading_ease <= 100.0);
2397    }
2398
2399    #[test]
2400    fn readability_ease_clamped_to_100() {
2401        // Very simple text should not exceed 100
2402        let audit = ReadabilityAudit::analyze("Go. Do. Be.");
2403        assert!(audit.reading_ease <= 100.0);
2404        assert!(audit.reading_ease >= 0.0);
2405    }
2406}
ssg/llm.rs

ssg/
llm.rs