Skip to main content

ssg/
search.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Client-side search index generator.
5//!
6//! Generates a JSON search index and injects a search UI into HTML pages,
7//! providing instant full-text search without any server or external service.
8//!
9//! # How it works
10//!
11//! 1. At build time, `SearchIndex` scans all HTML files in the site directory.
12//! 2. It extracts the page title, URL, headings, and body text.
13//! 3. It writes a `search-index.json` file to the site root.
14//! 4. The `SearchPlugin` injects a `<script>` tag and search UI into every
15//!    HTML page that loads the index and performs client-side fuzzy matching.
16//!
17//! The search UI is a modal overlay activated by `Ctrl+K` / `Cmd+K`.
18
19use crate::plugin::{Plugin, PluginContext};
20use anyhow::{Context, Result};
21use rayon::prelude::*;
22use serde::{Deserialize, Serialize};
23use std::fs;
24use std::path::{Path, PathBuf};
25
26/// A single entry in the search index.
27#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
28pub struct SearchEntry {
29    /// Page title extracted from `<title>` or first `<h1>`.
30    pub title: String,
31    /// Relative URL path (e.g., `/about/index.html`).
32    pub url: String,
33    /// Plain-text body content, truncated to `MAX_CONTENT_LENGTH`.
34    pub content: String,
35    /// Section headings found on the page.
36    pub headings: Vec<String>,
37}
38
39/// Maximum content length per page in the search index (characters).
40/// Keeps the index compact for fast client-side loading.
41pub const MAX_CONTENT_LENGTH: usize = 5_000;
42
43/// Maximum number of pages to index.
44pub const MAX_INDEX_ENTRIES: usize = 50_000;
45
46/// The complete search index written to `search-index.json`.
47#[derive(Debug, Clone, Serialize, Deserialize, Default)]
48pub struct SearchIndex {
49    /// All indexed pages.
50    pub entries: Vec<SearchEntry>,
51}
52
53impl SearchIndex {
54    /// Build a search index from all HTML files in `site_dir`.
55    ///
56    /// Walks the directory recursively, extracts content from each
57    /// `.html` file, and returns the populated index.
58    pub fn build(site_dir: &Path) -> Result<Self> {
59        let html_files = collect_html_files(site_dir)?;
60        let capped: Vec<_> =
61            html_files.into_iter().take(MAX_INDEX_ENTRIES).collect();
62
63        let entries: Vec<SearchEntry> = capped
64            .par_iter()
65            .map(|path| -> Result<SearchEntry> {
66                let html = fs::read_to_string(path).with_context(|| {
67                    format!("cannot read {}", path.display())
68                })?;
69
70                let rel_url = path
71                    .strip_prefix(site_dir)
72                    .unwrap_or(path)
73                    .to_string_lossy()
74                    .replace('\\', "/");
75
76                let title = extract_title(&html);
77                let headings = extract_headings(&html);
78                let content = extract_text(&html);
79
80                Ok(SearchEntry {
81                    title,
82                    url: format!("/{rel_url}"),
83                    content: truncate(&content, MAX_CONTENT_LENGTH),
84                    headings,
85                })
86            })
87            .collect::<Result<Vec<_>>>()?;
88
89        Ok(Self { entries })
90    }
91
92    /// Write the index to `search-index.json` in the given directory.
93    pub fn write(&self, site_dir: &Path) -> Result<()> {
94        let json = serde_json::to_string(self)
95            .context("failed to serialize search index")?;
96        let path = site_dir.join("search-index.json");
97        fs::write(&path, json)
98            .with_context(|| format!("cannot write {}", path.display()))?;
99        Ok(())
100    }
101
102    /// Number of indexed pages.
103    #[must_use]
104    pub const fn len(&self) -> usize {
105        self.entries.len()
106    }
107
108    /// Returns true if the index has no entries.
109    #[must_use]
110    pub const fn is_empty(&self) -> bool {
111        self.entries.is_empty()
112    }
113}
114
115/// Localizable strings shown in the search widget UI.
116///
117/// All fields are plain text. They are HTML-escaped when substituted into
118/// attributes/text and JS-escaped when substituted into the inline script
119/// (for the "no results" message). Build a value with one of the bundled
120/// constructors ([`SearchLabels::english`], [`SearchLabels::french`],
121/// [`SearchLabels::for_locale`]) or construct your own for any locale.
122#[derive(Debug, Clone)]
123pub struct SearchLabels {
124    /// Visible text on the trigger button (e.g. "Search").
125    pub button_text: String,
126    /// `aria-label` of the trigger button.
127    pub button_aria: String,
128    /// `aria-label` of the modal dialog.
129    pub modal_aria: String,
130    /// Placeholder text inside the input field.
131    pub input_placeholder: String,
132    /// `aria-label` of the input field.
133    pub input_aria: String,
134    /// Footer hint text shown next to the `Esc` key.
135    pub footer_close: String,
136    /// Footer hint text shown next to the up/down arrow keys.
137    pub footer_navigate: String,
138    /// Footer hint text shown next to the `Enter` key.
139    pub footer_open: String,
140    /// Message shown when a query has no matches. The literal `{query}`
141    /// is replaced with the typed query at runtime.
142    pub no_results: String,
143}
144
145/// Compact per-locale strings used by [`SearchLabels::for_locale`].
146struct LocaleEntry {
147    button: &'static str,
148    placeholder: &'static str,
149    close: &'static str,
150    navigate: &'static str,
151    open: &'static str,
152    no_results: &'static str,
153}
154
155/// Translations for the locales bundled with the search widget.
156const LOCALE_TABLE: &[(&str, LocaleEntry)] = &[
157    ("en", LocaleEntry { button: "Search",     placeholder: "Search documentation...",                close: "close",     navigate: "navigate",   open: "open",     no_results: "No results for \u{201c}{query}\u{201d}" }),
158    ("fr", LocaleEntry { button: "Rechercher", placeholder: "Rechercher dans la documentation...",    close: "fermer",    navigate: "naviguer",   open: "ouvrir",   no_results: "Aucun r\u{e9}sultat pour \u{ab}\u{a0}{query}\u{a0}\u{bb}" }),
159    ("ar", LocaleEntry { button: "بحث",        placeholder: "ابحث في الوثائق...",                      close: "إغلاق",     navigate: "تنقل",        open: "فتح",      no_results: "لا توجد نتائج لـ «{query}»" }),
160    ("bn", LocaleEntry { button: "অনুসন্ধান",  placeholder: "ডকুমেন্টেশন অনুসন্ধান করুন...",          close: "বন্ধ",      navigate: "নেভিগেট",     open: "খুলুন",    no_results: "{query} এর জন্য কোনো ফলাফল নেই" }),
161    ("cs", LocaleEntry { button: "Hledat",     placeholder: "Prohledat dokumentaci...",               close: "zav\u{159}\u{ed}t", navigate: "proch\u{e1}zet", open: "otev\u{159}\u{ed}t", no_results: "\u{17d}\u{e1}dn\u{e9} v\u{fd}sledky pro \u{201e}{query}\u{201c}" }),
162    ("de", LocaleEntry { button: "Suchen",     placeholder: "Dokumentation durchsuchen...",           close: "schlie\u{df}en", navigate: "navigieren", open: "\u{f6}ffnen", no_results: "Keine Ergebnisse f\u{fc}r \u{201e}{query}\u{201c}" }),
163    ("es", LocaleEntry { button: "Buscar",     placeholder: "Buscar en la documentaci\u{f3}n...",    close: "cerrar",    navigate: "navegar",    open: "abrir",    no_results: "Sin resultados para \u{ab}{query}\u{bb}" }),
164    ("ha", LocaleEntry { button: "Bincike",    placeholder: "Bincika takardun...",                    close: "rufe",      navigate: "kewaya",     open: "bu\u{6b}e", no_results: "Babu sakamako don \u{201c}{query}\u{201d}" }),
165    ("he", LocaleEntry { button: "חיפוש",      placeholder: "חפש בתיעוד...",                          close: "סגור",       navigate: "נווט",        open: "פתח",      no_results: "אין תוצאות עבור «{query}»" }),
166    ("hi", LocaleEntry { button: "खोजें",       placeholder: "दस्तावेज़ खोजें...",                      close: "बंद करें",   navigate: "नेविगेट",     open: "खोलें",    no_results: "{query} के लिए कोई परिणाम नहीं" }),
167    ("id", LocaleEntry { button: "Cari",       placeholder: "Cari dokumentasi...",                    close: "tutup",     navigate: "navigasi",   open: "buka",     no_results: "Tidak ada hasil untuk \u{201c}{query}\u{201d}" }),
168    ("it", LocaleEntry { button: "Cerca",      placeholder: "Cerca nella documentazione...",          close: "chiudi",    navigate: "naviga",     open: "apri",     no_results: "Nessun risultato per \u{ab}{query}\u{bb}" }),
169    ("ja", LocaleEntry { button: "検索",        placeholder: "ドキュメントを検索...",                     close: "閉じる",    navigate: "移動",        open: "開く",     no_results: "「{query}」の結果はありません" }),
170    ("ko", LocaleEntry { button: "검색",        placeholder: "문서 검색...",                              close: "닫기",       navigate: "탐색",        open: "열기",     no_results: "«{query}»에 대한 결과가 없습니다" }),
171    ("nl", LocaleEntry { button: "Zoeken",     placeholder: "Documentatie doorzoeken...",             close: "sluiten",   navigate: "navigeren",  open: "openen",   no_results: "Geen resultaten voor \u{201c}{query}\u{201d}" }),
172    ("pl", LocaleEntry { button: "Szukaj",     placeholder: "Przeszukaj dokumentacj\u{119}...",      close: "zamknij",   navigate: "nawiguj",    open: "otw\u{f3}rz", no_results: "Brak wynik\u{f3}w dla \u{201e}{query}\u{201d}" }),
173    ("pt", LocaleEntry { button: "Pesquisar",  placeholder: "Pesquisar na documenta\u{e7}\u{e3}o...", close: "fechar",  navigate: "navegar",    open: "abrir",    no_results: "Sem resultados para \u{ab}{query}\u{bb}" }),
174    ("ro", LocaleEntry { button: "Caut\u{103}", placeholder: "Caut\u{103} \u{ee}n documenta\u{21b}ie...", close: "\u{ee}nchide", navigate: "navigheaz\u{103}", open: "deschide", no_results: "Niciun rezultat pentru \u{201e}{query}\u{201d}" }),
175    ("ru", LocaleEntry { button: "Поиск",      placeholder: "Поиск по документации...",               close: "закрыть",   navigate: "навигация",  open: "открыть",  no_results: "Нет результатов для «{query}»" }),
176    ("sv", LocaleEntry { button: "S\u{f6}k",  placeholder: "S\u{f6}k i dokumentationen...",         close: "st\u{e4}ng", navigate: "navigera", open: "\u{f6}ppna", no_results: "Inga resultat f\u{f6}r \u{201d}{query}\u{201d}" }),
177    ("th", LocaleEntry { button: "ค้นหา",       placeholder: "ค้นหาเอกสาร...",                          close: "ปิด",        navigate: "นำทาง",       open: "เปิด",      no_results: "ไม่พบผลลัพธ์สำหรับ \u{201c}{query}\u{201d}" }),
178    ("tl", LocaleEntry { button: "Maghanap",   placeholder: "Maghanap sa dokumentasyon...",           close: "isara",     navigate: "mag-navigate", open: "buksan", no_results: "Walang resulta para sa \u{201c}{query}\u{201d}" }),
179    ("tr", LocaleEntry { button: "Ara",        placeholder: "Belgelerde ara...",                      close: "kapat",     navigate: "gezin",      open: "a\u{e7}", no_results: "\u{201c}{query}\u{201d} i\u{e7}in sonu\u{e7} yok" }),
180    ("uk", LocaleEntry { button: "Пошук",      placeholder: "Пошук у документації...",                close: "закрити",   navigate: "навігація",  open: "відкрити", no_results: "Немає результатів для «{query}»" }),
181    ("vi", LocaleEntry { button: "T\u{ec}m ki\u{1ebf}m", placeholder: "T\u{ec}m trong t\u{e0}i li\u{1ec7}u...", close: "\u{111}\u{f3}ng", navigate: "\u{111}i\u{1ec1}u h\u{1b0}\u{1edb}ng", open: "m\u{1edf}", no_results: "Kh\u{f4}ng c\u{f3} k\u{1ebf}t qu\u{1ea3} cho \u{201c}{query}\u{201d}" }),
182    ("yo", LocaleEntry { button: "Wáàwáà",     placeholder: "Ṣàwárí ìwé...",                           close: "pa",        navigate: "lọ kiri",    open: "ṣí",       no_results: "Kò sí àbájáde fún \u{201c}{query}\u{201d}" }),
183    ("zh", LocaleEntry { button: "搜索",        placeholder: "搜索文档...",                              close: "关闭",       navigate: "导航",        open: "打开",     no_results: "「{query}」没有匹配结果" }),
184    ("zh-tw", LocaleEntry { button: "搜尋",     placeholder: "搜尋文件...",                              close: "關閉",       navigate: "瀏覽",        open: "開啟",     no_results: "「{query}」找不到結果" }),
185];
186
187impl SearchLabels {
188    /// English (default) labels.
189    #[must_use]
190    pub fn english() -> Self {
191        Self::for_locale("en")
192    }
193
194    /// French labels.
195    #[must_use]
196    pub fn french() -> Self {
197        Self::for_locale("fr")
198    }
199
200    /// Build labels for a known locale code (ISO 639-1, plus `zh-tw`).
201    ///
202    /// Lookup is case-insensitive. Falls back to English if the code is not
203    /// in the bundled table.
204    #[must_use]
205    pub fn for_locale(code: &str) -> Self {
206        let key = code.to_ascii_lowercase();
207        let entry = LOCALE_TABLE.iter().find(|(c, _)| *c == key).map_or_else(
208            || {
209                // `LOCALE_TABLE` is a hand-authored constant array that
210                // always contains the `en` entry; the `expect` is a
211                // type-system formality, not a runtime risk.
212                #[allow(clippy::expect_used)]
213                let en = LOCALE_TABLE
214                    .iter()
215                    .find(|(c, _)| *c == "en")
216                    .expect("en entry must exist in LOCALE_TABLE");
217                &en.1
218            },
219            |(_, e)| e,
220        );
221        Self {
222            button_text: entry.button.into(),
223            button_aria: entry.button.into(),
224            modal_aria: entry.button.into(),
225            input_placeholder: entry.placeholder.into(),
226            input_aria: entry.button.into(),
227            footer_close: entry.close.into(),
228            footer_navigate: entry.navigate.into(),
229            footer_open: entry.open.into(),
230            no_results: entry.no_results.into(),
231        }
232    }
233}
234
235impl Default for SearchLabels {
236    fn default() -> Self {
237        Self::english()
238    }
239}
240
241/// Plugin that generates a search index and injects client-side search UI.
242///
243/// The unit form uses [`SearchLabels::english`] for the modal copy. To render
244/// the widget in another language, construct a [`LocalizedSearchPlugin`].
245///
246/// # Example
247///
248/// ```rust
249/// use ssg::plugin::PluginManager;
250/// use ssg::search::SearchPlugin;
251///
252/// let mut pm = PluginManager::new();
253/// pm.register(SearchPlugin);
254/// ```
255#[derive(Debug, Copy, Clone)]
256pub struct SearchPlugin;
257
258impl Plugin for SearchPlugin {
259    fn name(&self) -> &'static str {
260        "search"
261    }
262
263    fn has_transform(&self) -> bool {
264        true
265    }
266
267    fn transform_html(
268        &self,
269        html: &str,
270        _path: &Path,
271        _ctx: &PluginContext,
272    ) -> Result<String> {
273        transform_search_html(html, &SearchLabels::english())
274    }
275
276    fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
277        run_search_index(ctx)
278    }
279}
280
281/// Variant of [`SearchPlugin`] that injects the widget with caller-supplied
282/// localized [`SearchLabels`].
283///
284/// # Example
285///
286/// ```rust
287/// use ssg::plugin::PluginManager;
288/// use ssg::search::{LocalizedSearchPlugin, SearchLabels};
289///
290/// let mut pm = PluginManager::new();
291/// pm.register(LocalizedSearchPlugin::new(SearchLabels::french()));
292/// ```
293#[derive(Debug, Clone)]
294pub struct LocalizedSearchPlugin {
295    labels: SearchLabels,
296}
297
298impl LocalizedSearchPlugin {
299    /// Create a new localized search plugin with the given labels.
300    #[must_use]
301    pub const fn new(labels: SearchLabels) -> Self {
302        Self { labels }
303    }
304}
305
306impl Plugin for LocalizedSearchPlugin {
307    fn name(&self) -> &'static str {
308        "search"
309    }
310
311    fn has_transform(&self) -> bool {
312        true
313    }
314
315    fn transform_html(
316        &self,
317        html: &str,
318        _path: &Path,
319        _ctx: &PluginContext,
320    ) -> Result<String> {
321        transform_search_html(html, &self.labels)
322    }
323
324    fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
325        run_search_index(ctx)
326    }
327}
328
329/// Builds the search index and writes it to disk (`after_compile` phase).
330fn run_search_index(ctx: &PluginContext) -> Result<()> {
331    if !ctx.site_dir.exists() {
332        return Ok(());
333    }
334
335    let index = SearchIndex::build(&ctx.site_dir)?;
336    if index.is_empty() {
337        return Ok(());
338    }
339
340    index.write(&ctx.site_dir)?;
341
342    println!(
343        "[search] Indexed {} pages, search-index.json written",
344        index.len()
345    );
346    Ok(())
347}
348
349/// Injects the search widget into an HTML string (`transform_html` phase).
350fn transform_search_html(html: &str, labels: &SearchLabels) -> Result<String> {
351    if html.contains("ssg-search-widget") {
352        return Ok(html.to_string()); // Already injected
353    }
354
355    let script = build_widget_script(labels);
356
357    let injected = if let Some(pos) = html.rfind("</body>") {
358        format!("{}{}{}", &html[..pos], script, &html[pos..])
359    } else {
360        format!("{html}{script}")
361    };
362
363    Ok(injected)
364}
365
366// =====================================================================
367// HTML content extraction (lightweight, no external parser)
368// =====================================================================
369
370/// Extract the page title from `<title>` tag or first `<h1>`.
371fn extract_title(html: &str) -> String {
372    // Try <title>
373    if let Some(start) = html.find("<title>") {
374        let after = &html[start + 7..];
375        if let Some(end) = after.find("</title>") {
376            let title = &after[..end];
377            if !title.trim().is_empty() {
378                return strip_tags(title).trim().to_string();
379            }
380        }
381    }
382    // Fallback to first <h1>
383    if let Some(start) = html.find("<h1") {
384        let after = &html[start..];
385        if let Some(gt) = after.find('>') {
386            let content = &after[gt + 1..];
387            if let Some(end) = content.find("</h1>") {
388                return strip_tags(&content[..end]).trim().to_string();
389            }
390        }
391    }
392    String::new()
393}
394
395/// Extract all heading text (`<h1>` through `<h6>`).
396fn extract_headings(html: &str) -> Vec<String> {
397    let mut headings = Vec::new();
398    for tag in &["h1", "h2", "h3", "h4", "h5", "h6"] {
399        let open = format!("<{tag}");
400        let close = format!("</{tag}>");
401        let mut search_from = 0;
402
403        while let Some(start) = html[search_from..].find(&open) {
404            let abs_start = search_from + start;
405            let after = &html[abs_start..];
406            if let Some(gt) = after.find('>') {
407                let content = &after[gt + 1..];
408                if let Some(end) = content.find(&close) {
409                    let text = strip_tags(&content[..end]).trim().to_string();
410                    if !text.is_empty() {
411                        headings.push(text);
412                    }
413                    search_from = abs_start + gt + 1 + end + close.len();
414                } else {
415                    break;
416                }
417            } else {
418                break;
419            }
420        }
421    }
422    headings
423}
424
425/// Extract visible text from HTML, stripping all tags.
426fn extract_text(html: &str) -> String {
427    // Remove non-content blocks. Note: <header> is intentionally kept
428    // so hero taglines / subtitles are searchable.
429    let mut clean = html.to_string();
430    for tag in &["script", "style", "nav", "footer", "head"] {
431        let open = format!("<{tag}");
432        let close = format!("</{tag}>");
433        while let Some(start) = clean.find(&open) {
434            if let Some(end) = clean[start..].find(&close) {
435                clean.replace_range(start..start + end + close.len(), " ");
436            } else {
437                break;
438            }
439        }
440    }
441    strip_tags(&clean)
442}
443
444/// Remove all HTML tags, collapse whitespace.
445fn strip_tags(html: &str) -> String {
446    let mut result = String::with_capacity(html.len());
447    let mut in_tag = false;
448    for ch in html.chars() {
449        match ch {
450            '<' => in_tag = true,
451            '>' => {
452                in_tag = false;
453                result.push(' ');
454            }
455            _ if !in_tag => result.push(ch),
456            _ => {}
457        }
458    }
459    // Collapse whitespace
460    let mut collapsed = String::with_capacity(result.len());
461    let mut prev_space = false;
462    for ch in result.chars() {
463        if ch.is_whitespace() {
464            if !prev_space {
465                collapsed.push(' ');
466                prev_space = true;
467            }
468        } else {
469            collapsed.push(ch);
470            prev_space = false;
471        }
472    }
473    collapsed.trim().to_string()
474}
475
476/// Truncate a string to approximately `max` characters at a word boundary.
477fn truncate(s: &str, max: usize) -> String {
478    if s.chars().count() <= max {
479        return s.to_string();
480    }
481    let byte_pos: usize = s
482        .char_indices()
483        .take(max)
484        .last()
485        .map_or(0, |(i, c)| i + c.len_utf8());
486    let truncated = &s[..byte_pos];
487    if let Some(last_space) = truncated.rfind(' ') {
488        truncated[..last_space].to_string()
489    } else {
490        truncated.to_string()
491    }
492}
493
494/// Collect all `.html` files under `dir` (delegates to `crate::walk`).
495fn collect_html_files(dir: &Path) -> Result<Vec<PathBuf>> {
496    crate::walk::walk_files_bounded_count(dir, "html", MAX_INDEX_ENTRIES)
497}
498
499/// Inject the search UI script into an HTML file.
500///
501/// Inserts a `<script>` block before `</body>` that:
502/// 1. Loads `search-index.json`
503/// 2. Creates a modal overlay with an input field
504/// 3. Performs case-insensitive substring matching on title + content
505/// 4. Displays results with highlighted snippets
506/// 5. Activates on `Ctrl+K` / `Cmd+K`
507#[cfg(test)]
508fn inject_search_ui(path: &Path, script: &str) -> Result<()> {
509    let html = fs::read_to_string(path)
510        .with_context(|| format!("cannot read {}", path.display()))?;
511
512    if html.contains("ssg-search-widget") {
513        return Ok(()); // Already injected
514    }
515
516    let injected = if let Some(pos) = html.rfind("</body>") {
517        format!("{}{}{}", &html[..pos], script, &html[pos..])
518    } else {
519        format!("{html}{script}")
520    };
521
522    fs::write(path, injected)
523        .with_context(|| format!("cannot write {}", path.display()))?;
524    Ok(())
525}
526
527/// Render [`SEARCH_WIDGET_SCRIPT`] (a template) with the given labels.
528///
529/// HTML attribute / text values are HTML-escaped; the `no_results` string is
530/// also JS-escaped because it ends up inside a single-quoted JS string literal.
531fn build_widget_script(labels: &SearchLabels) -> String {
532    let no_results_with_expr = html_escape(&labels.no_results)
533        .replace("{query}", "&ldquo;\'+esc(q)+\'&rdquo;");
534
535    SEARCH_WIDGET_SCRIPT
536        .replace("{{SSG_BTN_ARIA}}", &html_escape(&labels.button_aria))
537        .replace("{{SSG_BTN_TEXT}}", &html_escape(&labels.button_text))
538        .replace("{{SSG_MODAL_ARIA}}", &html_escape(&labels.modal_aria))
539        .replace(
540            "{{SSG_INPUT_PLACEHOLDER}}",
541            &html_escape(&labels.input_placeholder),
542        )
543        .replace("{{SSG_INPUT_ARIA}}", &html_escape(&labels.input_aria))
544        .replace("{{SSG_FOOTER_CLOSE}}", &html_escape(&labels.footer_close))
545        .replace(
546            "{{SSG_FOOTER_NAVIGATE}}",
547            &html_escape(&labels.footer_navigate),
548        )
549        .replace("{{SSG_FOOTER_OPEN}}", &html_escape(&labels.footer_open))
550        .replace("{{SSG_NO_RESULTS}}", &js_escape(&no_results_with_expr))
551}
552
553/// Minimal HTML escaper covering the characters that matter inside attribute
554/// values and text nodes.
555fn html_escape(s: &str) -> String {
556    let mut out = String::with_capacity(s.len());
557    for ch in s.chars() {
558        match ch {
559            '&' => out.push_str("&amp;"),
560            '<' => out.push_str("&lt;"),
561            '>' => out.push_str("&gt;"),
562            '"' => out.push_str("&quot;"),
563            '\'' => out.push_str("&#39;"),
564            _ => out.push(ch),
565        }
566    }
567    out
568}
569
570/// Escape a string so it is safe to embed inside a single-quoted JS literal.
571fn js_escape(s: &str) -> String {
572    let mut out = String::with_capacity(s.len());
573    for ch in s.chars() {
574        match ch {
575            '\\' => out.push_str("\\\\"),
576            '\'' => out.push_str("\\\'"),
577            '\n' => out.push_str("\\n"),
578            '\r' => out.push_str("\\r"),
579            _ => out.push(ch),
580        }
581    }
582    out
583}
584
585/// The self-contained search widget (HTML + CSS + JS).
586///
587/// Includes a fixed search button in the top-right corner (like pacs008.com's
588/// `DocSearch` bar) that opens a full-screen search modal. Also responds to
589/// `Ctrl+K` / `Cmd+K`.
590const SEARCH_WIDGET_SCRIPT: &str = r#"
591<!-- SSG Search Widget -->
592<div id="ssg-search-widget">
593<style>
594/* ── Trigger button (always visible) ── */
595#ssg-search-btn{position:fixed;top:16px;right:16px;z-index:9998;display:flex;align-items:center;gap:8px;padding:8px 16px;background:#fff;border:1px solid #d1d5db;border-radius:8px;cursor:pointer;font-family:-apple-system,system-ui,sans-serif;font-size:14px;color:#595960;box-shadow:0 1px 3px rgba(0,0,0,.08);transition:border-color .15s,box-shadow .15s}
596#ssg-search-btn:hover{border-color:#595960;box-shadow:0 2px 6px rgba(0,0,0,.12)}
597#ssg-search-btn svg{width:16px;height:16px;stroke:currentColor;fill:none;stroke-width:2;stroke-linecap:round;stroke-linejoin:round}
598#ssg-search-btn kbd{font-family:inherit;font-size:11px;padding:2px 6px;background:#f3f4f6;border:1px solid #e5e7eb;border-radius:4px;color:#595960;margin-left:4px}
599/* ── Modal overlay ── */
600#ssg-search-overlay{display:none;position:fixed;inset:0;z-index:9999;background:rgba(0,0,0,.5);align-items:flex-start;justify-content:center;padding-top:12vh}
601#ssg-search-overlay.active{display:flex}
602#ssg-search-box{background:#fff;border-radius:12px;width:92%;max-width:640px;box-shadow:0 25px 60px rgba(0,0,0,.3);overflow:hidden;font-family:-apple-system,system-ui,sans-serif}
603#ssg-search-header{display:flex;align-items:center;padding:0 16px;border-bottom:1px solid #e5e7eb}
604#ssg-search-header svg{width:20px;height:20px;stroke:#9ca3af;fill:none;stroke-width:2;flex-shrink:0}
605#ssg-search-input{flex:1;padding:16px 12px;font-size:16px;border:none;outline:none;background:transparent}
606#ssg-search-results{max-height:50vh;overflow-y:auto}
607#ssg-sr-status{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);border:0}
608.ssg-result{display:block;padding:12px 20px;text-decoration:none;color:#111;border-bottom:1px solid #f3f4f6;transition:background .1s}
609.ssg-result:hover,.ssg-result.active{background:#ecfdf5}
610.ssg-result-title{font-weight:600;font-size:15px;margin-bottom:3px}
611.ssg-result-snippet{font-size:13px;color:#595960;line-height:1.5}
612.ssg-result-snippet mark{background:#fef08a;color:inherit;border-radius:2px;padding:0 2px}
613.ssg-no-results{padding:32px 20px;text-align:center;color:#595960;font-size:14px}
614.ssg-no-results[role="status"]{}
615/* Forced-colours / Windows High Contrast Mode */
616@media(forced-colors:active){
617#ssg-search-btn{border:1px solid ButtonText}
618#ssg-search-btn:focus{outline:2px solid Highlight}
619#ssg-search-input{border:1px solid CanvasText}
620#ssg-search-input:focus{outline:2px solid Highlight}
621.ssg-result:focus,.ssg-result.active{outline:2px solid Highlight}
622.ssg-result-snippet mark{background:Highlight;color:HighlightText}
623}
624.ssg-search-footer{display:flex;gap:16px;padding:10px 20px;font-size:12px;color:#595960;border-top:1px solid #e5e7eb;justify-content:flex-end}
625.ssg-search-footer kbd{font-family:inherit;font-size:11px;padding:1px 5px;background:#f3f4f6;border:1px solid #e5e7eb;border-radius:3px}
626/* ── Dark mode (media query + data-theme attribute) ── */
627@media(prefers-color-scheme:dark){
628:root:not([data-theme="light"]) #ssg-search-btn{background:#1f2937;border-color:#374151;color:#cccccf}
629:root:not([data-theme="light"]) #ssg-search-btn:hover{border-color:#4b5563}
630:root:not([data-theme="light"]) #ssg-search-btn kbd{background:#374151;border-color:#4b5563;color:#cccccf}
631:root:not([data-theme="light"]) #ssg-search-box{background:#1f2937;color:#f9fafb}
632:root:not([data-theme="light"]) #ssg-search-header{border-color:#374151}
633:root:not([data-theme="light"]) #ssg-search-input{color:#f9fafb}
634:root:not([data-theme="light"]) .ssg-result{color:#f9fafb;border-color:#374151}
635:root:not([data-theme="light"]) .ssg-result:hover,:root:not([data-theme="light"]) .ssg-result.active{background:#374151}
636:root:not([data-theme="light"]) .ssg-result-snippet{color:#cccccf}
637:root:not([data-theme="light"]) .ssg-result-snippet mark{background:#854d0e;color:#fef08a}
638:root:not([data-theme="light"]) .ssg-no-results{color:#cccccf}
639:root:not([data-theme="light"]) .ssg-search-footer{border-color:#374151;color:#cccccf}
640:root:not([data-theme="light"]) .ssg-search-footer kbd{background:#374151;border-color:#4b5563}
641}
642[data-theme="dark"] #ssg-search-btn{background:#1f2937;border-color:#374151;color:#cccccf}
643[data-theme="dark"] #ssg-search-btn:hover{border-color:#4b5563}
644[data-theme="dark"] #ssg-search-btn kbd{background:#374151;border-color:#4b5563;color:#cccccf}
645[data-theme="dark"] #ssg-search-box{background:#1f2937;color:#f9fafb}
646[data-theme="dark"] #ssg-search-header{border-color:#374151}
647[data-theme="dark"] #ssg-search-input{color:#f9fafb}
648[data-theme="dark"] .ssg-result{color:#f9fafb;border-color:#374151}
649[data-theme="dark"] .ssg-result:hover,[data-theme="dark"] .ssg-result.active{background:#374151}
650[data-theme="dark"] .ssg-result-snippet{color:#cccccf}
651[data-theme="dark"] .ssg-result-snippet mark{background:#854d0e;color:#fef08a}
652[data-theme="dark"] .ssg-no-results{color:#cccccf}
653[data-theme="dark"] .ssg-search-footer{border-color:#374151;color:#cccccf}
654[data-theme="dark"] .ssg-search-footer kbd{background:#374151;border-color:#4b5563}
655</style>
656<!-- Search trigger button -->
657<button id="ssg-search-btn" type="button" aria-label="{{SSG_BTN_ARIA}}">
658<svg viewBox="0 0 24 24"><circle cx="11" cy="11" r="8"/><line x1="21" y1="21" x2="16.65" y2="16.65"/></svg>
659<span>{{SSG_BTN_TEXT}}</span>
660<kbd>K</kbd>
661</button>
662<!-- Search modal -->
663<div id="ssg-search-overlay" role="dialog" aria-label="{{SSG_MODAL_ARIA}}">
664<div id="ssg-search-box">
665<div id="ssg-search-header">
666<svg viewBox="0 0 24 24"><circle cx="11" cy="11" r="8"/><line x1="21" y1="21" x2="16.65" y2="16.65"/></svg>
667<input id="ssg-search-input" type="search" placeholder="{{SSG_INPUT_PLACEHOLDER}}" autocomplete="off" aria-label="{{SSG_INPUT_ARIA}}"/>
668</div>
669<div id="ssg-search-results" aria-live="polite"></div>
670<div id="ssg-sr-status" role="status" aria-live="polite" aria-atomic="true"></div>
671<div class="ssg-search-footer"><span><kbd>Esc</kbd> {{SSG_FOOTER_CLOSE}}</span><span><kbd>&uarr;</kbd><kbd>&darr;</kbd> {{SSG_FOOTER_NAVIGATE}}</span><span><kbd>Enter</kbd> {{SSG_FOOTER_OPEN}}</span></div>
672</div>
673</div>
674<script>
675(function(){
676var idx=null,overlay=document.getElementById('ssg-search-overlay'),
677input=document.getElementById('ssg-search-input'),
678results=document.getElementById('ssg-search-results'),
679btn=document.getElementById('ssg-search-btn'),active=-1,
680lm=location.pathname.match(/^\/(en|fr|ar|bn|cs|de|es|ha|he|hi|id|it|ja|ko|nl|pl|pt|ro|ru|sv|th|tl|tr|uk|vi|yo|zh-tw|zh)\//),
681lp=lm?'/'+lm[1]:'';
682function load(){if(idx)return Promise.resolve();var sp=lm?'/'+lm[1]+'/search-index.json':'/search-index.json';return fetch(sp).then(function(r){return r.json()}).then(function(d){idx=d.entries||[]}).catch(function(){idx=[]})}
683function open(){load().then(function(){overlay.classList.add('active');input.value='';results.innerHTML='';input.focus();active=-1})}
684function close(){overlay.classList.remove('active');active=-1}
685function highlight(text,q){if(!q)return esc(text);var re=new RegExp('('+q.replace(/[.*+?^${}()|[\]\\]/g,'\\$&')+')','gi');return esc(text).replace(re,'<mark>$1</mark>')}
686function esc(s){var d=document.createElement('div');d.textContent=s;return d.innerHTML}
687function snippet(content,q,len){len=len||150;if(!q)return esc(content.substring(0,len));var i=content.toLowerCase().indexOf(q.toLowerCase());if(i<0)return esc(content.substring(0,len));var s=Math.max(0,i-50),e=Math.min(content.length,i+len);var t=(s>0?'...':'')+content.substring(s,e)+(e<content.length?'...':'');return highlight(t,q)}
688function search(q){if(!idx||!q){results.innerHTML='';return}q=q.trim();if(!q){results.innerHTML='';return}var ql=q.toLowerCase(),hits=[];
689for(var i=0;i<idx.length&&hits.length<20;i++){var e=idx[i],s=0;if(e.title.toLowerCase().indexOf(ql)>=0)s+=10;if(e.content.toLowerCase().indexOf(ql)>=0)s+=5;for(var h=0;h<e.headings.length;h++){if(e.headings[h].toLowerCase().indexOf(ql)>=0){s+=3;break}}if(s>0)hits.push({entry:e,score:s})}
690hits.sort(function(a,b){return b.score-a.score});
691var sr=document.getElementById('ssg-sr-status');
692if(!hits.length){results.innerHTML='<div class="ssg-no-results" role="status">{{SSG_NO_RESULTS}}</div>';if(sr)sr.textContent='No results found';return}
693var html='';for(var j=0;j<hits.length;j++){var e=hits[j].entry;html+='<a class="ssg-result" href="'+esc(lp+e.url)+'">'+'<div class="ssg-result-title">'+highlight(e.title,q)+'</div>'+'<div class="ssg-result-snippet">'+snippet(e.content,q)+'</div></a>'}
694results.innerHTML=html;active=-1;if(sr)sr.textContent=hits.length+' result'+(hits.length===1?'':'s')+' found'}
695function nav(dir){var items=results.querySelectorAll('.ssg-result');if(!items.length)return;if(active>=0&&items[active])items[active].classList.remove('active');active+=dir;if(active<0)active=items.length-1;if(active>=items.length)active=0;items[active].classList.add('active');items[active].scrollIntoView({block:'nearest'})}
696btn.addEventListener('click',function(){open()});
697input.addEventListener('input',function(){search(this.value)});
698overlay.addEventListener('click',function(e){if(e.target===overlay)close()});
699document.addEventListener('keydown',function(e){if((e.ctrlKey||e.metaKey)&&e.key==='k'){e.preventDefault();if(overlay.classList.contains('active'))close();else open()}
700if(!overlay.classList.contains('active'))return;if(e.key==='Escape')close();if(e.key==='ArrowDown'){e.preventDefault();nav(1)}if(e.key==='ArrowUp'){e.preventDefault();nav(-1)}
701if(e.key==='Enter'){e.preventDefault();var items=results.querySelectorAll('.ssg-result');if(active>=0&&items[active])window.location=items[active].href;else if(items[0])window.location=items[0].href}})
702})();
703</script>
704</div>
705"#;
706
707#[cfg(test)]
708#[allow(clippy::unwrap_used, clippy::expect_used)]
709mod tests {
710    use super::*;
711    use tempfile::tempdir;
712
713    fn make_html(title: &str, body: &str) -> String {
714        format!(
715            "<html><head><title>{title}</title></head>\
716             <body><h1>{title}</h1>{body}</body></html>"
717        )
718    }
719
720    #[test]
721    fn extract_title_from_title_tag() {
722        let html =
723            "<html><head><title>My Page</title></head><body></body></html>";
724        assert_eq!(extract_title(html), "My Page");
725    }
726
727    #[test]
728    fn extract_title_from_h1() {
729        let html = "<html><body><h1>Heading</h1></body></html>";
730        assert_eq!(extract_title(html), "Heading");
731    }
732
733    #[test]
734    fn extract_title_empty() {
735        assert_eq!(extract_title("<html><body></body></html>"), "");
736    }
737
738    #[test]
739    fn extract_headings_multiple() {
740        let html = "<h1>Title</h1><h2>Intro</h2><h3>Detail</h3>";
741        let h = extract_headings(html);
742        assert_eq!(h, vec!["Title", "Intro", "Detail"]);
743    }
744
745    #[test]
746    fn extract_headings_with_attributes() {
747        let html = r#"<h2 class="section" id="s1">Section One</h2>"#;
748        let h = extract_headings(html);
749        assert_eq!(h, vec!["Section One"]);
750    }
751
752    #[test]
753    fn extract_text_strips_tags() {
754        let html = "<p>Hello <strong>world</strong></p>";
755        let text = extract_text(html);
756        assert_eq!(text, "Hello world");
757    }
758
759    #[test]
760    fn extract_text_removes_scripts() {
761        let html = "<body><script>alert(1)</script><p>Visible</p></body>";
762        let text = extract_text(html);
763        assert!(text.contains("Visible"));
764        assert!(!text.contains("alert"));
765    }
766
767    #[test]
768    fn strip_tags_collapses_whitespace() {
769        let result = strip_tags("<p>  hello   <br>  world  </p>");
770        assert_eq!(result, "hello world");
771    }
772
773    #[test]
774    fn truncate_short_string() {
775        assert_eq!(truncate("short", 100), "short");
776    }
777
778    #[test]
779    fn truncate_at_word_boundary() {
780        let result = truncate("hello beautiful world", 18);
781        assert_eq!(result, "hello beautiful");
782    }
783
784    #[test]
785    fn search_index_build_from_directory() -> Result<()> {
786        let tmp = tempdir()?;
787        fs::write(
788            tmp.path().join("index.html"),
789            make_html("Home", "<p>Welcome to SSG</p>"),
790        )?;
791        fs::write(
792            tmp.path().join("about.html"),
793            make_html("About", "<p>About this site</p>"),
794        )?;
795
796        let index = SearchIndex::build(tmp.path())?;
797        assert_eq!(index.len(), 2);
798        assert!(!index.is_empty());
799
800        let titles: Vec<&str> =
801            index.entries.iter().map(|e| e.title.as_str()).collect();
802        assert!(titles.contains(&"Home"));
803        assert!(titles.contains(&"About"));
804        Ok(())
805    }
806
807    #[test]
808    fn search_index_write_creates_json() -> Result<()> {
809        let tmp = tempdir()?;
810        let index = SearchIndex {
811            entries: vec![SearchEntry {
812                title: "Test".into(),
813                url: "/test.html".into(),
814                content: "Test content".into(),
815                headings: vec!["Heading".into()],
816            }],
817        };
818        index.write(tmp.path())?;
819
820        let path = tmp.path().join("search-index.json");
821        assert!(path.exists());
822        let json: SearchIndex =
823            serde_json::from_str(&fs::read_to_string(&path)?)?;
824        assert_eq!(json.entries.len(), 1);
825        assert_eq!(json.entries[0].title, "Test");
826        Ok(())
827    }
828
829    #[test]
830    fn search_index_empty_directory() -> Result<()> {
831        let tmp = tempdir()?;
832        let index = SearchIndex::build(tmp.path())?;
833        assert!(index.is_empty());
834        Ok(())
835    }
836
837    #[test]
838    fn search_index_ignores_non_html() -> Result<()> {
839        let tmp = tempdir()?;
840        fs::write(tmp.path().join("style.css"), "body{}")?;
841        fs::write(tmp.path().join("data.json"), "{}")?;
842        let index = SearchIndex::build(tmp.path())?;
843        assert!(index.is_empty());
844        Ok(())
845    }
846
847    #[test]
848    fn search_index_nested_directories() -> Result<()> {
849        let tmp = tempdir()?;
850        fs::create_dir_all(tmp.path().join("blog"))?;
851        fs::write(tmp.path().join("index.html"), make_html("Home", ""))?;
852        fs::write(
853            tmp.path().join("blog/post.html"),
854            make_html("Post", "<p>Blog content</p>"),
855        )?;
856
857        let index = SearchIndex::build(tmp.path())?;
858        assert_eq!(index.len(), 2);
859        let urls: Vec<&str> =
860            index.entries.iter().map(|e| e.url.as_str()).collect();
861        assert!(urls.iter().any(|u| u.contains("blog")));
862        Ok(())
863    }
864
865    #[test]
866    fn search_entry_content_truncated() -> Result<()> {
867        let tmp = tempdir()?;
868        let long_text = "word ".repeat(2000); // 10,000 chars
869        fs::write(
870            tmp.path().join("long.html"),
871            make_html("Long", &format!("<p>{long_text}</p>")),
872        )?;
873
874        let index = SearchIndex::build(tmp.path())?;
875        assert!(index.entries[0].content.len() <= MAX_CONTENT_LENGTH);
876        Ok(())
877    }
878
879    #[test]
880    fn inject_search_ui_adds_widget() -> Result<()> {
881        let tmp = tempdir()?;
882        let path = tmp.path().join("page.html");
883        fs::write(&path, "<html><body><p>Hello</p></body></html>")?;
884
885        let script = build_widget_script(&SearchLabels::english());
886        inject_search_ui(&path, &script)?;
887
888        let result = fs::read_to_string(&path)?;
889        assert!(result.contains("ssg-search-widget"));
890        assert!(result.contains("search-index.json"));
891        assert!(result.contains("ctrlKey"));
892        Ok(())
893    }
894
895    #[test]
896    fn inject_search_ui_idempotent() -> Result<()> {
897        let tmp = tempdir()?;
898        let path = tmp.path().join("page.html");
899        fs::write(&path, "<html><body><p>Hi</p></body></html>")?;
900
901        let script = build_widget_script(&SearchLabels::english());
902        inject_search_ui(&path, &script)?;
903        let first = fs::read_to_string(&path)?;
904
905        inject_search_ui(&path, &script)?;
906        let second = fs::read_to_string(&path)?;
907
908        assert_eq!(first, second); // No double injection
909        Ok(())
910    }
911
912    #[test]
913    fn search_plugin_name() {
914        assert_eq!(SearchPlugin.name(), "search");
915    }
916
917    #[test]
918    fn search_plugin_full_pipeline() -> Result<()> {
919        let tmp = tempdir()?;
920        let html_content = make_html("Home", "<p>Welcome</p>");
921        fs::write(tmp.path().join("index.html"), &html_content)?;
922        fs::write(
923            tmp.path().join("about.html"),
924            make_html("About", "<p>About us</p>"),
925        )?;
926
927        let ctx = PluginContext::new(
928            Path::new("content"),
929            Path::new("build"),
930            tmp.path(),
931            Path::new("templates"),
932        );
933        SearchPlugin.after_compile(&ctx)?;
934
935        // Index was written
936        assert!(tmp.path().join("search-index.json").exists());
937
938        // Widget was injected via transform_html
939        let output = SearchPlugin.transform_html(
940            &html_content,
941            &tmp.path().join("index.html"),
942            &ctx,
943        )?;
944        assert!(output.contains("ssg-search-widget"));
945        Ok(())
946    }
947
948    #[test]
949    fn search_plugin_nonexistent_dir() -> Result<()> {
950        let ctx = PluginContext::new(
951            Path::new("c"),
952            Path::new("b"),
953            Path::new("/nonexistent"),
954            Path::new("t"),
955        );
956        SearchPlugin.after_compile(&ctx)?; // Should not error
957        Ok(())
958    }
959
960    #[test]
961    fn search_plugin_registers() {
962        use crate::plugin::PluginManager;
963        let mut pm = PluginManager::new();
964        pm.register(SearchPlugin);
965        assert_eq!(pm.names(), vec!["search"]);
966    }
967
968    #[test]
969    fn search_entry_serialize_deserialize() -> Result<()> {
970        let entry = SearchEntry {
971            title: "Test".into(),
972            url: "/test.html".into(),
973            content: "Content".into(),
974            headings: vec!["H1".into()],
975        };
976        let json = serde_json::to_string(&entry)?;
977        let parsed: SearchEntry = serde_json::from_str(&json)?;
978        assert_eq!(entry, parsed);
979        Ok(())
980    }
981
982    // -------------------------------------------------------------------
983    // Targeted edge-case coverage
984    // -------------------------------------------------------------------
985
986    #[test]
987    fn search_plugin_after_compile_empty_index_short_circuits() -> Result<()> {
988        // Line 136: `if index.is_empty() { return Ok(()) }`. Need a
989        // site with HTML files that produce zero entries — easiest:
990        // a site with only a stylesheet (collect_html_files returns
991        // empty, build returns empty index).
992        let tmp = tempdir()?;
993        fs::write(tmp.path().join("style.css"), "body{}")?;
994        let ctx = PluginContext::new(
995            Path::new("content"),
996            Path::new("build"),
997            tmp.path(),
998            Path::new("templates"),
999        );
1000        SearchPlugin.after_compile(&ctx)?;
1001        // No search-index.json should have been written.
1002        assert!(!tmp.path().join("search-index.json").exists());
1003        Ok(())
1004    }
1005
1006    #[test]
1007    fn extract_title_empty_title_falls_back_to_h1() {
1008        // Line 167 false branch: title trimmed is empty, so we fall
1009        // through to the h1 fallback at lines 172-180.
1010        let html = "<html><head><title>   </title></head><body><h1>Heading One</h1></body></html>";
1011        assert_eq!(extract_title(html), "Heading One");
1012    }
1013
1014    #[test]
1015    fn extract_title_no_title_tag_falls_back_to_h1() {
1016        // Lines 178-179: the h1 fallback Some-Some success path.
1017        let html = "<html><body><h1>From H1</h1></body></html>";
1018        assert_eq!(extract_title(html), "From H1");
1019    }
1020
1021    #[test]
1022    fn extract_title_h1_with_attributes_works() {
1023        // Verifies the `find('>')` step at line 174 handles attrs.
1024        let html = r#"<html><body><h1 class="title">Attrs</h1></body></html>"#;
1025        assert_eq!(extract_title(html), "Attrs");
1026    }
1027
1028    #[test]
1029    fn extract_title_no_title_no_h1_returns_empty() {
1030        let html = "<html><body><p>just a paragraph</p></body></html>";
1031        assert_eq!(extract_title(html), "");
1032    }
1033
1034    #[test]
1035    fn extract_title_unterminated_title_falls_back_to_h1() {
1036        // <title> open without close — `find("</title>")` returns
1037        // None, the outer `if let` body exits, and the function
1038        // proceeds to the <h1> fallback.
1039        let html =
1040            "<html><head><title>Open<body><h1>Fallback</h1></body></html>";
1041        let result = extract_title(html);
1042        assert_eq!(result, "Fallback");
1043    }
1044
1045    #[test]
1046    fn extract_title_unterminated_h1_returns_empty() {
1047        // <h1> open without `>` and without `</h1>` — both inner
1048        // `if let`s return None, function returns "".
1049        let html = "<html><body><h1 attr=\"open";
1050        assert_eq!(extract_title(html), "");
1051    }
1052
1053    #[test]
1054    fn extract_headings_unterminated_h_tag_breaks_inner_loop() {
1055        // Line 204: the `break` when no `</hN>` close tag is found.
1056        let html = "<html><body><h1>Has close</h1><h2>no close tag";
1057        let headings = extract_headings(html);
1058        // The first heading is captured; the unterminated one
1059        // breaks out of the inner loop without panicking.
1060        assert!(headings.contains(&"Has close".to_string()));
1061    }
1062
1063    #[test]
1064    fn extract_headings_unterminated_open_tag_breaks_outer() {
1065        // Line 207: the `break` when `<h1` has no `>`. Build a
1066        // pathological string that contains `<h1` but never `>`
1067        // afterwards.
1068        let html = "<h1 attr=\"unterminated";
1069        let headings = extract_headings(html);
1070        assert!(headings.is_empty());
1071    }
1072
1073    #[test]
1074    fn extract_text_unterminated_strip_tag_breaks() {
1075        // Line 225: the `break` in the strip loop when a tag opener
1076        // exists but no matching close. extract_text strips
1077        // <script>/<style>/etc. blocks; an unterminated <script>
1078        // hits the inner break.
1079        let html = "<html><body><script>unterminated<p>visible</p>";
1080        let _ = extract_text(html);
1081    }
1082
1083    #[test]
1084    fn truncate_no_space_falls_back_to_byte_cut() {
1085        // Line 278: `else { truncated.to_string() }` when there is
1086        // no space within the first `max` characters.
1087        let result = truncate("oneverylongwordwithnospacesatall", 10);
1088        // Returns the byte-truncated string (no space to break on).
1089        assert_eq!(result, "oneverylon");
1090    }
1091
1092    #[test]
1093    fn truncate_short_string_returned_unchanged() {
1094        // Line 266 true branch: input shorter than max returns as-is.
1095        assert_eq!(truncate("short", 100), "short");
1096    }
1097
1098    #[test]
1099    fn collect_html_files_respects_bound() -> Result<()> {
1100        let tmp = tempdir()?;
1101        for i in 0..50 {
1102            fs::write(tmp.path().join(format!("p{i}.html")), "<html></html>")?;
1103        }
1104        let files = collect_html_files(tmp.path())?;
1105        assert_eq!(files.len(), 50);
1106        Ok(())
1107    }
1108
1109    #[test]
1110    fn search_index_empty_site_dir() -> Result<()> {
1111        // Arrange
1112        let tmp = tempdir()?;
1113
1114        // Act
1115        let index = SearchIndex::build(tmp.path())?;
1116
1117        // Assert
1118        assert!(index.is_empty());
1119        assert_eq!(index.len(), 0);
1120        Ok(())
1121    }
1122
1123    #[test]
1124    fn search_index_max_content_length_truncation() -> Result<()> {
1125        // Arrange
1126        let tmp = tempdir()?;
1127        let long_content = "a ".repeat(MAX_CONTENT_LENGTH + 1000);
1128        fs::write(
1129            tmp.path().join("long.html"),
1130            make_html("Long Page", &format!("<p>{long_content}</p>")),
1131        )?;
1132
1133        // Act
1134        let index = SearchIndex::build(tmp.path())?;
1135
1136        // Assert
1137        assert_eq!(index.len(), 1);
1138        assert!(
1139            index.entries[0].content.chars().count() <= MAX_CONTENT_LENGTH,
1140            "content should be truncated to at most MAX_CONTENT_LENGTH characters"
1141        );
1142        Ok(())
1143    }
1144
1145    #[test]
1146    fn search_index_unicode_content() -> Result<()> {
1147        // Arrange
1148        let tmp = tempdir()?;
1149        let unicode_body = "<p>Héllo wörld! 日本語テスト 🦀🔍 Ñoño café</p>";
1150        fs::write(
1151            tmp.path().join("unicode.html"),
1152            make_html("Ünïcödé Pagé 🎉", unicode_body),
1153        )?;
1154
1155        // Act
1156        let index = SearchIndex::build(tmp.path())?;
1157
1158        // Assert
1159        assert_eq!(index.len(), 1);
1160        let entry = &index.entries[0];
1161        assert_eq!(entry.title, "Ünïcödé Pagé 🎉");
1162        assert!(entry.content.contains("日本語テスト"));
1163        assert!(entry.content.contains("🦀🔍"));
1164        assert!(entry.content.contains("café"));
1165        Ok(())
1166    }
1167
1168    #[test]
1169    fn search_plugin_nonexistent_dir_returns_ok() -> Result<()> {
1170        // Arrange
1171        let ctx = PluginContext::new(
1172            Path::new("content"),
1173            Path::new("build"),
1174            Path::new("/tmp/nonexistent_search_test_dir_xyz"),
1175            Path::new("templates"),
1176        );
1177
1178        // Act
1179        let result = SearchPlugin.after_compile(&ctx);
1180
1181        // Assert
1182        assert!(result.is_ok());
1183        Ok(())
1184    }
1185
1186    #[test]
1187    fn inject_search_ui_no_body_tag() -> Result<()> {
1188        // Arrange
1189        let tmp = tempdir()?;
1190        let path = tmp.path().join("fragment.html");
1191        fs::write(&path, "<html><p>No body tag here</p></html>")?;
1192
1193        // Act
1194        let script = build_widget_script(&SearchLabels::english());
1195        inject_search_ui(&path, &script)?;
1196
1197        // Assert
1198        let result = fs::read_to_string(&path)?;
1199        assert!(
1200            result.contains("ssg-search-widget"),
1201            "widget should be appended even without </body>"
1202        );
1203        assert!(result.contains("<html><p>No body tag here</p></html>"));
1204        Ok(())
1205    }
1206
1207    #[test]
1208    fn search_entry_serialization_roundtrip() -> Result<()> {
1209        // Arrange
1210        let entry = SearchEntry {
1211            title: "Roundtrip Test".into(),
1212            url: "/roundtrip/index.html".into(),
1213            content: "Some searchable content here".into(),
1214            headings: vec!["Introduction".into(), "Details".into()],
1215        };
1216
1217        // Act
1218        let json = serde_json::to_string(&entry)?;
1219        let deserialized: SearchEntry = serde_json::from_str(&json)?;
1220
1221        // Assert
1222        assert_eq!(entry, deserialized);
1223        assert_eq!(deserialized.title, "Roundtrip Test");
1224        assert_eq!(deserialized.headings.len(), 2);
1225        Ok(())
1226    }
1227
1228    #[test]
1229    fn search_index_multiple_headings() -> Result<()> {
1230        // Arrange
1231        let tmp = tempdir()?;
1232        let html = "\
1233            <html><head><title>Multi Heading</title></head><body>\
1234            <h1>Main Title</h1>\
1235            <h2>Section A</h2>\
1236            <p>Content A</p>\
1237            <h3>Subsection A1</h3>\
1238            <p>Content A1</p>\
1239            </body></html>";
1240        fs::write(tmp.path().join("headings.html"), html)?;
1241
1242        // Act
1243        let index = SearchIndex::build(tmp.path())?;
1244
1245        // Assert
1246        assert_eq!(index.len(), 1);
1247        let entry = &index.entries[0];
1248        assert!(entry.headings.contains(&"Main Title".to_string()));
1249        assert!(entry.headings.contains(&"Section A".to_string()));
1250        assert!(entry.headings.contains(&"Subsection A1".to_string()));
1251        assert_eq!(entry.headings.len(), 3);
1252        Ok(())
1253    }
1254
1255    #[test]
1256    fn search_index_nested_directories_deep() -> Result<()> {
1257        // Arrange
1258        let tmp = tempdir()?;
1259        fs::create_dir_all(tmp.path().join("docs/guide/advanced"))?;
1260        fs::write(
1261            tmp.path().join("index.html"),
1262            make_html("Root", "<p>Root page</p>"),
1263        )?;
1264        fs::write(
1265            tmp.path().join("docs/overview.html"),
1266            make_html("Docs", "<p>Docs overview</p>"),
1267        )?;
1268        fs::write(
1269            tmp.path().join("docs/guide/advanced/tips.html"),
1270            make_html("Tips", "<p>Advanced tips</p>"),
1271        )?;
1272
1273        // Act
1274        let index = SearchIndex::build(tmp.path())?;
1275
1276        // Assert
1277        assert_eq!(index.len(), 3);
1278        let urls: Vec<&str> =
1279            index.entries.iter().map(|e| e.url.as_str()).collect();
1280        assert!(urls.iter().any(|u| u.contains("docs/guide/advanced")));
1281        assert!(urls.iter().any(|u| u.contains("index.html")));
1282        Ok(())
1283    }
1284
1285    // -----------------------------------------------------------------
1286    // SearchIndex::build — parallel path with multiple HTML files
1287    // -----------------------------------------------------------------
1288
1289    #[test]
1290    fn search_index_build_parallel_with_many_files() -> Result<()> {
1291        let tmp = tempdir()?;
1292        for i in 0..10 {
1293            fs::write(
1294                tmp.path().join(format!("page{i}.html")),
1295                make_html(
1296                    &format!("Page {i}"),
1297                    &format!("<p>Content for page {i}</p>"),
1298                ),
1299            )?;
1300        }
1301
1302        let index = SearchIndex::build(tmp.path())?;
1303        assert_eq!(index.len(), 10);
1304
1305        // Verify all pages are indexed
1306        for i in 0..10 {
1307            let title = format!("Page {i}");
1308            assert!(
1309                index.entries.iter().any(|e| e.title == title),
1310                "missing entry for {title}"
1311            );
1312        }
1313        Ok(())
1314    }
1315
1316    // -----------------------------------------------------------------
1317    // extract_headings — h1 through h6
1318    // -----------------------------------------------------------------
1319
1320    #[test]
1321    fn extract_headings_all_levels() {
1322        let html = "\
1323            <h1>One</h1>\
1324            <h2>Two</h2>\
1325            <h3>Three</h3>\
1326            <h4>Four</h4>\
1327            <h5>Five</h5>\
1328            <h6>Six</h6>";
1329        let h = extract_headings(html);
1330        assert_eq!(h, vec!["One", "Two", "Three", "Four", "Five", "Six"]);
1331    }
1332
1333    #[test]
1334    fn extract_headings_empty_heading_skipped() {
1335        let html = "<h1></h1><h2>Real Heading</h2>";
1336        let h = extract_headings(html);
1337        assert_eq!(h, vec!["Real Heading"]);
1338    }
1339
1340    // -----------------------------------------------------------------
1341    // truncate — word boundary and short content
1342    // -----------------------------------------------------------------
1343
1344    #[test]
1345    fn truncate_at_word_boundary_exact() {
1346        // truncate(s, 13) takes first 13 chars "one two three"
1347        // then finds last space at position 7, truncating to "one two"
1348        let result = truncate("one two three four five", 13);
1349        assert_eq!(result, "one two");
1350    }
1351
1352    #[test]
1353    fn truncate_content_shorter_than_limit() {
1354        let input = "short text";
1355        assert_eq!(truncate(input, 1000), "short text");
1356    }
1357
1358    #[test]
1359    fn truncate_exact_length_returns_unchanged() {
1360        let input = "exact";
1361        assert_eq!(truncate(input, 5), "exact");
1362    }
1363
1364    // -----------------------------------------------------------------
1365    // SearchLabels::for_locale
1366    // -----------------------------------------------------------------
1367
1368    #[test]
1369    fn search_labels_for_locale_french() {
1370        let labels = SearchLabels::for_locale("fr");
1371        assert_eq!(labels.button_text, "Rechercher");
1372        assert!(labels.input_placeholder.contains("Rechercher"));
1373        assert_eq!(labels.footer_close, "fermer");
1374    }
1375
1376    #[test]
1377    fn search_labels_for_locale_german() {
1378        let labels = SearchLabels::for_locale("de");
1379        assert_eq!(labels.button_text, "Suchen");
1380        assert_eq!(labels.footer_open, "\u{f6}ffnen"); // öffnen
1381    }
1382
1383    #[test]
1384    fn search_labels_for_locale_unknown_falls_back_to_english() {
1385        let labels = SearchLabels::for_locale("xx");
1386        assert_eq!(labels.button_text, "Search");
1387        assert!(labels.input_placeholder.contains("Search"));
1388        assert_eq!(labels.footer_close, "close");
1389    }
1390
1391    #[test]
1392    fn search_labels_for_locale_case_insensitive() {
1393        let labels = SearchLabels::for_locale("FR");
1394        assert_eq!(labels.button_text, "Rechercher");
1395    }
1396
1397    #[test]
1398    fn search_labels_for_locale_zh_tw() {
1399        let labels = SearchLabels::for_locale("zh-tw");
1400        assert_eq!(labels.button_text, "搜尋");
1401    }
1402
1403    #[test]
1404    fn search_labels_default_is_english() {
1405        let labels = SearchLabels::default();
1406        assert_eq!(labels.button_text, "Search");
1407    }
1408
1409    #[test]
1410    fn search_labels_english_constructor() {
1411        let labels = SearchLabels::english();
1412        assert_eq!(labels.button_text, "Search");
1413        assert_eq!(
1414            SearchLabels::english().input_placeholder,
1415            labels.input_placeholder
1416        );
1417    }
1418
1419    #[test]
1420    fn search_labels_french_constructor() {
1421        let labels = SearchLabels::french();
1422        assert_eq!(labels.button_text, "Rechercher");
1423    }
1424
1425    #[test]
1426    fn localized_search_plugin_new_keeps_supplied_labels() {
1427        let labels = SearchLabels::french();
1428        let p = LocalizedSearchPlugin::new(labels.clone());
1429        assert_eq!(p.labels.button_text, "Rechercher");
1430    }
1431
1432    #[test]
1433    fn localized_search_plugin_name_is_search() {
1434        let p = LocalizedSearchPlugin::new(SearchLabels::default());
1435        assert_eq!(p.name(), "search");
1436    }
1437
1438    #[test]
1439    fn localized_search_plugin_no_op_when_site_missing() -> Result<()> {
1440        let dir = tempdir().unwrap();
1441        let nope = dir.path().join("nope");
1442        let ctx = PluginContext::new(
1443            Path::new("c"),
1444            Path::new("b"),
1445            &nope,
1446            Path::new("t"),
1447        );
1448        LocalizedSearchPlugin::new(SearchLabels::default())
1449            .after_compile(&ctx)?;
1450        Ok(())
1451    }
1452
1453    #[test]
1454    fn localized_search_plugin_writes_index_with_localized_labels() -> Result<()>
1455    {
1456        let dir = tempdir().unwrap();
1457        let html_content =
1458            "<html><head><title>P</title></head><body>x</body></html>";
1459        fs::write(dir.path().join("page.html"), html_content)?;
1460        let ctx = PluginContext::new(
1461            Path::new("c"),
1462            Path::new("b"),
1463            dir.path(),
1464            Path::new("t"),
1465        );
1466        let plugin = LocalizedSearchPlugin::new(SearchLabels::french());
1467        plugin.after_compile(&ctx)?;
1468        let output = plugin.transform_html(
1469            html_content,
1470            &dir.path().join("page.html"),
1471            &ctx,
1472        )?;
1473        // Localized button text should appear in the injected widget.
1474        assert!(
1475            output.contains("Rechercher"),
1476            "French label 'Rechercher' should appear in injected UI"
1477        );
1478        Ok(())
1479    }
1480}
1481
1482#[cfg(test)]
1483#[allow(clippy::unwrap_used, clippy::expect_used)]
1484mod proptests {
1485    use super::*;
1486    use proptest::prelude::*;
1487
1488    proptest! {
1489        #![proptest_config(ProptestConfig::with_cases(1000))]
1490
1491        /// After stripping tags the output must contain no angle brackets.
1492        #[test]
1493        fn strip_tags_no_angle_brackets(input in "\\PC*") {
1494            let stripped = strip_tags(&input);
1495            prop_assert!(
1496                !stripped.contains('<') && !stripped.contains('>'),
1497                "angle brackets survived strip_tags: {:?}", stripped,
1498            );
1499        }
1500    }
1501}