1use super::helpers::escape_attr;
7use crate::plugin::{Plugin, PluginContext};
8use anyhow::Result;
9use std::path::Path;
10
11#[derive(Debug, Clone)]
30pub struct CanonicalPlugin {
31 base_url: String,
32}
33
34impl CanonicalPlugin {
35 pub fn new(base_url: impl Into<String>) -> Self {
37 Self {
38 base_url: base_url.into(),
39 }
40 }
41}
42
43impl Plugin for CanonicalPlugin {
44 fn name(&self) -> &'static str {
45 "canonical"
46 }
47
48 fn has_transform(&self) -> bool {
49 true
50 }
51
52 fn transform_html(
53 &self,
54 html: &str,
55 path: &Path,
56 ctx: &PluginContext,
57 ) -> Result<String> {
58 let base = self.base_url.trim_end_matches('/');
59
60 let rel_path = path
61 .strip_prefix(&ctx.site_dir)
62 .unwrap_or(path)
63 .to_string_lossy()
64 .replace('\\', "/");
65
66 let tag = build_canonical_tag(base, &rel_path);
67
68 let mut result = remove_existing_canonicals(html);
69
70 result = if let Some(pos) = result.find("</head>") {
72 format!("{}{}\n{}", &result[..pos], tag, &result[pos..])
73 } else {
74 result
75 };
76
77 Ok(result)
78 }
79
80 fn after_compile(&self, _ctx: &PluginContext) -> Result<()> {
81 Ok(())
82 }
83}
84
85fn build_canonical_tag(base: &str, rel_path: &str) -> String {
87 let canonical_url = format!("{base}/{rel_path}");
88 format!(
89 "<link rel=\"canonical\" href=\"{}\">",
90 escape_attr(&canonical_url)
91 )
92}
93
94fn remove_existing_canonicals(html: &str) -> String {
96 let has_canonical = html.contains("rel=\"canonical\"")
97 || html.contains("rel='canonical'")
98 || html.contains("rel=canonical");
99 if !has_canonical {
100 return html.to_string();
101 }
102
103 let mut result = html.to_string();
104 for pat in &["rel=\"canonical\"", "rel='canonical'", "rel=canonical"] {
105 while let Some(pos) = result.find(pat) {
106 let start = result[..pos].rfind('<').unwrap_or(pos);
107 let end = result[pos..]
108 .find('>')
109 .map_or(result.len(), |i| pos + i + 1);
110 let end = if result.as_bytes().get(end) == Some(&b'\n') {
111 end + 1
112 } else {
113 end
114 };
115 result.replace_range(start..end, "");
116 }
117 }
118 result
119}
120
121#[cfg(test)]
122#[allow(clippy::unwrap_used, clippy::expect_used)]
123mod tests {
124 use super::*;
125 use crate::plugin::PluginContext;
126 use std::path::Path;
127 use tempfile::tempdir;
128
129 fn ctx(site: &Path) -> PluginContext {
130 PluginContext::new(
131 Path::new("content"),
132 Path::new("build"),
133 site,
134 Path::new("templates"),
135 )
136 }
137
138 #[test]
139 fn name_is_stable() {
140 assert_eq!(CanonicalPlugin::new("https://x").name(), "canonical");
141 }
142
143 #[test]
144 fn new_accepts_string_or_str() {
145 let _ = CanonicalPlugin::new("https://a");
146 let _ = CanonicalPlugin::new(String::from("https://b"));
147 }
148
149 #[test]
150 fn no_op_when_site_dir_missing() {
151 let dir = tempdir().unwrap();
152 CanonicalPlugin::new("https://x")
153 .after_compile(&ctx(&dir.path().join("nope")))
154 .unwrap();
155 }
156
157 #[test]
158 fn build_canonical_tag_joins_base_and_rel_path() {
159 let tag = build_canonical_tag("https://example.com", "blog/post.html");
160 assert_eq!(
161 tag,
162 r#"<link rel="canonical" href="https://example.com/blog/post.html">"#
163 );
164 }
165
166 #[test]
167 fn build_canonical_tag_escapes_href_attribute_value() {
168 let tag = build_canonical_tag("https://example.com", "x?a=1&b=2");
169 assert!(
171 tag.contains("&"),
172 "ampersand in URL must be HTML-escaped: {tag}"
173 );
174 }
175
176 #[test]
177 fn remove_existing_canonicals_no_op_when_none_present() {
178 let html = "<head><title>x</title></head>";
179 assert_eq!(remove_existing_canonicals(html), html);
180 }
181
182 #[test]
183 fn remove_existing_canonicals_strips_double_quoted() {
184 let html = r#"<head><link rel="canonical" href="/old"><title>x</title></head>"#;
185 let out = remove_existing_canonicals(html);
186 assert!(!out.contains("rel=\"canonical\""));
187 assert!(out.contains("<title>x</title>"));
188 }
189
190 #[test]
191 fn remove_existing_canonicals_strips_single_quoted() {
192 let html = "<head><link rel='canonical' href='/old'></head>";
193 let out = remove_existing_canonicals(html);
194 assert!(!out.contains("rel='canonical'"));
195 }
196
197 #[test]
198 fn remove_existing_canonicals_strips_unquoted() {
199 let html = "<head><link rel=canonical href=/old></head>";
200 let out = remove_existing_canonicals(html);
201 assert!(!out.contains("rel=canonical"));
202 }
203
204 #[test]
205 fn remove_existing_canonicals_strips_multiple() {
206 let html = r#"<head>
207 <link rel="canonical" href="/a">
208 <link rel="canonical" href="/b">
209 </head>"#;
210 let out = remove_existing_canonicals(html);
211 assert!(!out.contains("rel=\"canonical\""));
212 }
213
214 #[test]
215 fn transform_html_injects_canonical() {
216 let dir = tempdir().unwrap();
217 let c = ctx(dir.path());
218 let html = "<html><head></head><body></body></html>";
219 let page_path = dir.path().join("page.html");
220 let after = CanonicalPlugin::new("https://example.com")
221 .transform_html(html, &page_path, &c)
222 .unwrap();
223 assert!(
224 after.contains(r#"<link rel="canonical""#),
225 "canonical link should be injected: {after}"
226 );
227 }
228
229 #[test]
230 fn transform_html_replaces_existing_canonical_with_correct_one() {
231 let dir = tempdir().unwrap();
232 let c = ctx(dir.path());
233 let html =
234 r#"<html><head><link rel="canonical" href="/wrong"></head></html>"#;
235 let page_path = dir.path().join("page.html");
236 let after = CanonicalPlugin::new("https://example.com")
237 .transform_html(html, &page_path, &c)
238 .unwrap();
239 assert!(
240 after.contains("https://example.com"),
241 "wrong canonical replaced with correct: {after}"
242 );
243 assert!(
244 !after.contains("/wrong"),
245 "old canonical should be gone: {after}"
246 );
247 }
248
249 #[test]
250 fn transform_html_trims_trailing_slash_on_base_url() {
251 let dir = tempdir().unwrap();
252 let c = ctx(dir.path());
253 let html = "<html><head></head></html>";
254 let page_path = dir.path().join("page.html");
255 let after = CanonicalPlugin::new("https://example.com/")
256 .transform_html(html, &page_path, &c)
257 .unwrap();
258 assert!(
259 !after.contains("com//page.html"),
260 "no double-slash after trim: {after}"
261 );
262 }
263
264 #[test]
265 fn transform_html_handles_html_without_head_tag() {
266 let dir = tempdir().unwrap();
267 let c = ctx(dir.path());
268 let raw = "<!doctype html><html><body>only</body></html>";
269 let page_path = dir.path().join("frag.html");
270 let after = CanonicalPlugin::new("https://example.com")
271 .transform_html(raw, &page_path, &c)
272 .unwrap();
273 assert_eq!(after, raw);
274 }
275}