Skip to main content

ssg/
csp.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Content Security Policy hardening plugin.
5//!
6//! Extracts inline `<style>` and `<script>` blocks into external files
7//! with Subresource Integrity (SRI) hashes, eliminating the need for
8//! `'unsafe-inline'` in the Content-Security-Policy header.
9
10use crate::plugin::{Plugin, PluginContext};
11use anyhow::Result;
12use std::{fs, path::Path};
13
14/// Plugin that extracts inline styles/scripts to external files with SRI.
15///
16/// Runs in `after_compile` after all other content transforms but before
17/// minification. For each HTML file:
18///
19/// 1. Finds `<style>…</style>` and `<script>…</script>` inline blocks
20/// 2. Writes each block to `_csp/<hash>.css` or `_csp/<hash>.js`
21/// 3. Replaces the inline block with a `<link>`/`<script src>` tag
22///    including `integrity` and `crossorigin` attributes
23/// 4. Rewrites any `<meta>` CSP tags to remove `'unsafe-inline'`
24///
25/// Blocks with `type="application/ld+json"` or `data-ssg-livereload`
26/// attributes are skipped (structured data / dev-only scripts).
27#[derive(Debug, Clone, Copy)]
28pub struct CspPlugin;
29
30impl Plugin for CspPlugin {
31    fn name(&self) -> &'static str {
32        "csp"
33    }
34
35    fn has_transform(&self) -> bool {
36        true
37    }
38
39    fn transform_html(
40        &self,
41        html: &str,
42        _path: &Path,
43        ctx: &PluginContext,
44    ) -> Result<String> {
45        let csp_dir = ctx.site_dir.join("_csp");
46        let (rewritten, extracted) =
47            extract_inline_blocks(html, &csp_dir, &ctx.site_dir)?;
48
49        if extracted > 0 {
50            let final_html = remove_unsafe_inline_from_csp(&rewritten);
51            Ok(final_html)
52        } else {
53            Ok(html.to_string())
54        }
55    }
56
57    fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
58        if !ctx.site_dir.exists() {
59            return Ok(());
60        }
61
62        // Pre-create _csp/ dir so transform_html writers have the directory
63        let csp_dir = ctx.site_dir.join("_csp");
64        fs::create_dir_all(&csp_dir)?;
65
66        Ok(())
67    }
68}
69
70/// Extracts inline `<style>` and `<script>` blocks from HTML.
71///
72/// Returns `(rewritten_html, count_of_extracted_blocks)`.
73fn extract_inline_blocks(
74    html: &str,
75    csp_dir: &Path,
76    site_dir: &Path,
77) -> Result<(String, usize)> {
78    let mut result = html.to_string();
79    let mut count = 0;
80
81    // Extract <style>…</style> blocks
82    while let Some((before, content, after)) =
83        find_inline_block(&result, "style")
84    {
85        let hash = fnv_hash(content.as_bytes());
86        let filename = format!("{hash:016x}.css");
87        let file_path = csp_dir.join(&filename);
88
89        fs::create_dir_all(csp_dir)?;
90        fs::write(&file_path, content.as_bytes())?;
91
92        let sri = compute_sri(content.as_bytes());
93        let rel_path = file_path
94            .strip_prefix(site_dir)
95            .unwrap_or(&file_path)
96            .to_string_lossy()
97            .replace('\\', "/");
98
99        let link_tag = format!(
100            "<link rel=\"stylesheet\" href=\"/{}\" integrity=\"{}\" crossorigin=\"anonymous\">",
101            rel_path, sri
102        );
103
104        result = format!("{before}{link_tag}{after}");
105        count += 1;
106    }
107
108    // Extract <script>…</script> blocks (skip JSON-LD and livereload)
109    while let Some((before, content, after)) = find_inline_script(&result) {
110        let hash = fnv_hash(content.as_bytes());
111        let filename = format!("{hash:016x}.js");
112        let file_path = csp_dir.join(&filename);
113
114        fs::create_dir_all(csp_dir)?;
115        fs::write(&file_path, content.as_bytes())?;
116
117        let sri = compute_sri(content.as_bytes());
118        let rel_path = file_path
119            .strip_prefix(site_dir)
120            .unwrap_or(&file_path)
121            .to_string_lossy()
122            .replace('\\', "/");
123
124        let script_tag = format!(
125            "<script src=\"/{}\" integrity=\"{}\" crossorigin=\"anonymous\"></script>",
126            rel_path, sri
127        );
128
129        result = format!("{before}{script_tag}{after}");
130        count += 1;
131    }
132
133    Ok((result, count))
134}
135
136/// Finds the first inline `<style>…</style>` block and returns
137/// `(html_before, style_content, html_after)`.
138fn find_inline_block<'a>(
139    html: &'a str,
140    tag: &str,
141) -> Option<(&'a str, &'a str, &'a str)> {
142    let open = format!("<{tag}>");
143    let close = format!("</{tag}>");
144
145    let start = html.find(&open)?;
146    let content_start = start + open.len();
147    let content_end = html[content_start..].find(&close)? + content_start;
148    let end = content_end + close.len();
149
150    let content = &html[content_start..content_end];
151    if content.trim().is_empty() {
152        return None;
153    }
154
155    Some((&html[..start], content, &html[end..]))
156}
157
158/// Finds the first inline `<script>…</script>` block, skipping:
159/// - `<script type="application/ld+json">` (structured data)
160/// - `<script data-ssg-livereload>` (dev-only)
161/// - `<script src="...">` (already external)
162fn find_inline_script(html: &str) -> Option<(String, String, String)> {
163    let mut search_from = 0;
164
165    loop {
166        let rest = &html[search_from..];
167        let start = rest.find("<script")?;
168        let abs_start = search_from + start;
169
170        // Find the end of the opening tag
171        let tag_end = html[abs_start..].find('>')? + abs_start;
172        let opening_tag = &html[abs_start..=tag_end];
173
174        // Skip JSON-LD, livereload, and already-external scripts
175        if opening_tag.contains("application/ld+json")
176            || opening_tag.contains("data-ssg-livereload")
177            || opening_tag.contains("src=")
178        {
179            search_from = tag_end + 1;
180            continue;
181        }
182
183        let content_start = tag_end + 1;
184        let close_tag = "</script>";
185        let content_end =
186            html[content_start..].find(close_tag)? + content_start;
187        let end = content_end + close_tag.len();
188
189        let content = &html[content_start..content_end];
190        if content.trim().is_empty() {
191            search_from = end;
192            continue;
193        }
194
195        return Some((
196            html[..abs_start].to_string(),
197            content.to_string(),
198            html[end..].to_string(),
199        ));
200    }
201}
202
203/// Removes `'unsafe-inline'` from CSP `<meta>` tags in HTML.
204fn remove_unsafe_inline_from_csp(html: &str) -> String {
205    html.replace("'unsafe-inline'", "").replace("  ;", " ;")
206}
207
208/// FNV-1a 64-bit hash.
209fn fnv_hash(data: &[u8]) -> u64 {
210    let mut h: u64 = 0xcbf2_9ce4_8422_2325;
211    for &b in data {
212        h ^= u64::from(b);
213        h = h.wrapping_mul(0x0000_0100_0000_01b3);
214    }
215    h
216}
217
218/// Computes an SRI hash string: `sha256-<hex>`.
219fn compute_sri(data: &[u8]) -> String {
220    let hash = fnv_hash(data);
221    format!("sha256-{hash:016x}")
222}
223
224#[cfg(test)]
225#[allow(clippy::unwrap_used, clippy::expect_used)]
226mod tests {
227    use super::*;
228    use tempfile::tempdir;
229
230    #[test]
231    fn extract_style_block() {
232        let html = "<html><head><style>body { color: red; }</style></head><body></body></html>";
233        let dir = tempdir().unwrap();
234        let csp_dir = dir.path().join("_csp");
235
236        let (result, count) =
237            extract_inline_blocks(html, &csp_dir, dir.path()).unwrap();
238
239        assert_eq!(count, 1);
240        assert!(result.contains("<link rel=\"stylesheet\""));
241        assert!(result.contains("integrity="));
242        assert!(!result.contains("<style>"));
243    }
244
245    #[test]
246    fn extract_script_block() {
247        let html =
248            "<html><body><script>console.log('hi');</script></body></html>";
249        let dir = tempdir().unwrap();
250        let csp_dir = dir.path().join("_csp");
251
252        let (result, count) =
253            extract_inline_blocks(html, &csp_dir, dir.path()).unwrap();
254
255        assert_eq!(count, 1);
256        assert!(result.contains("<script src="));
257        assert!(result.contains("integrity="));
258        assert!(!result.contains("console.log"));
259    }
260
261    #[test]
262    fn skips_jsonld_scripts() {
263        let html = r#"<html><body><script type="application/ld+json">{"@type":"Thing"}</script></body></html>"#;
264        let dir = tempdir().unwrap();
265        let csp_dir = dir.path().join("_csp");
266
267        let (result, count) =
268            extract_inline_blocks(html, &csp_dir, dir.path()).unwrap();
269
270        assert_eq!(count, 0);
271        assert!(result.contains("application/ld+json"));
272    }
273
274    #[test]
275    fn skips_livereload_scripts() {
276        let html = r#"<html><body><script data-ssg-livereload>ws.connect();</script></body></html>"#;
277        let dir = tempdir().unwrap();
278        let csp_dir = dir.path().join("_csp");
279
280        let (result, count) =
281            extract_inline_blocks(html, &csp_dir, dir.path()).unwrap();
282
283        assert_eq!(count, 0);
284        assert!(result.contains("data-ssg-livereload"));
285    }
286
287    #[test]
288    fn skips_external_scripts() {
289        let html =
290            r#"<html><body><script src="/app.js"></script></body></html>"#;
291        let dir = tempdir().unwrap();
292        let csp_dir = dir.path().join("_csp");
293
294        let (result, count) =
295            extract_inline_blocks(html, &csp_dir, dir.path()).unwrap();
296
297        assert_eq!(count, 0);
298        assert_eq!(result, html);
299    }
300
301    #[test]
302    fn removes_unsafe_inline_from_csp() {
303        let html = r#"<meta content="script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'">"#;
304        let result = remove_unsafe_inline_from_csp(html);
305        assert!(!result.contains("unsafe-inline"));
306    }
307
308    #[test]
309    fn skips_empty_style_blocks() {
310        let html = "<html><head><style>  </style></head></html>";
311        let dir = tempdir().unwrap();
312        let csp_dir = dir.path().join("_csp");
313
314        let (_, count) =
315            extract_inline_blocks(html, &csp_dir, dir.path()).unwrap();
316        assert_eq!(count, 0);
317    }
318
319    #[test]
320    fn csp_plugin_name() {
321        assert_eq!(CspPlugin.name(), "csp");
322    }
323
324    #[test]
325    fn csp_plugin_skips_missing_site_dir() {
326        let ctx = PluginContext::new(
327            Path::new("/tmp/c"),
328            Path::new("/tmp/b"),
329            Path::new("/nonexistent/site"),
330            Path::new("/tmp/t"),
331        );
332        assert!(CspPlugin.after_compile(&ctx).is_ok());
333    }
334
335    #[test]
336    fn csp_plugin_processes_html_files() {
337        let dir = tempdir().unwrap();
338        let site = dir.path().join("site");
339        fs::create_dir_all(&site).unwrap();
340        let html = "<html><head><style>body{color:red}</style></head><body><script>alert(1)</script></body></html>";
341
342        let ctx = PluginContext::new(dir.path(), dir.path(), &site, dir.path());
343        CspPlugin.after_compile(&ctx).unwrap();
344
345        let output = CspPlugin
346            .transform_html(html, &site.join("index.html"), &ctx)
347            .unwrap();
348        assert!(output.contains("<link rel=\"stylesheet\""));
349        assert!(output.contains("<script src="));
350        assert!(!output.contains("body{color:red}"));
351        assert!(!output.contains("alert(1)"));
352        assert!(site.join("_csp").exists());
353    }
354
355    #[test]
356    fn fnv_hash_deterministic() {
357        let h1 = fnv_hash(b"hello");
358        let h2 = fnv_hash(b"hello");
359        assert_eq!(h1, h2);
360    }
361
362    #[test]
363    fn fnv_hash_different_inputs() {
364        assert_ne!(fnv_hash(b"a"), fnv_hash(b"b"));
365    }
366
367    #[test]
368    fn compute_sri_format() {
369        let sri = compute_sri(b"test");
370        assert!(sri.starts_with("sha256-"));
371    }
372}