Skip to main content

ssg/cmd/
validation.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! URL and path validation utilities.
5
6use super::error::CliError;
7use super::RESERVED_NAMES;
8use std::fs;
9use std::path::Path;
10
11/// Returns `true` if `s` looks like a valid HTTP(S) URL.
12pub fn is_valid_url(s: &str) -> bool {
13    let rest = if let Some(r) = s.strip_prefix("https://") {
14        r
15    } else if let Some(r) = s.strip_prefix("http://") {
16        r
17    } else {
18        return false;
19    };
20
21    // Must have a dot in the host portion
22    if !rest.contains('.') {
23        return false;
24    }
25
26    // Validate port if present: split host from path, then check for ':'
27    let authority = rest.split('/').next().unwrap_or(rest);
28    if let Some(colon_pos) = authority.rfind(':') {
29        let port_str = &authority[colon_pos + 1..];
30        if !port_str.is_empty() {
31            match port_str.parse::<u16>() {
32                Ok(_) => {}
33                Err(_) => return false,
34            }
35        }
36    }
37
38    true
39}
40
41/// Validates a URL for security and format.
42///
43/// # Examples
44/// ```
45/// use ssg::cmd::validate_url;
46/// assert!(validate_url("http://example.com").is_ok());
47/// assert!(validate_url("javascript:alert(1)").is_err());
48/// ```
49pub fn validate_url(url: &str) -> Result<(), CliError> {
50    let xss_patterns = ["javascript:", "data:", "vbscript:"];
51    if xss_patterns.iter().any(|p| url.contains(p)) {
52        return Err(CliError::InvalidUrl(
53            "URL contains unsafe protocol".into(),
54        ));
55    }
56
57    if url.contains('<') || url.contains('>') || url.contains('"') {
58        return Err(CliError::InvalidUrl(
59            "URL contains invalid characters".into(),
60        ));
61    }
62
63    if !is_valid_url(url) {
64        return Err(CliError::InvalidUrl(url.to_string()));
65    }
66    Ok(())
67}
68
69pub(super) fn validate_path_safety(
70    path: &Path,
71    field: &str,
72) -> Result<(), CliError> {
73    // Check for invalid characters and mixed separators
74    let path_str = path.to_string_lossy();
75
76    // Basic invalid characters
77    let invalid_chars = ["<", ">", "|", "\"", "?", "*"];
78    if invalid_chars.iter().any(|&c| path_str.contains(c)) {
79        return Err(CliError::InvalidPath {
80            field: field.to_string(),
81            details: "Path contains invalid characters".to_string(),
82        });
83    }
84
85    // Check for mixed/invalid path separators (only on non-Windows)
86    #[cfg(not(target_os = "windows"))]
87    if path_str.contains('\\') {
88        return Err(CliError::InvalidPath {
89            field: field.to_string(),
90            details: "Path contains backslashes".to_string(),
91        });
92    }
93
94    // Parent directory traversal check
95    if !path.is_absolute() && path_str.contains("..") {
96        return Err(CliError::InvalidPath {
97            field: field.to_string(),
98            details: "Path contains parent directory traversal".to_string(),
99        });
100    }
101
102    // Check for Windows reserved names
103    if let Some(stem) = path.file_stem() {
104        let stem_lower = stem.to_string_lossy().to_lowercase();
105        if RESERVED_NAMES.contains(&stem_lower.as_str()) {
106            return Err(CliError::InvalidPath {
107                field: field.to_string(),
108                details: format!("Path uses reserved name '{stem_lower}'"),
109            });
110        }
111    }
112
113    // If path exists, check if it's a symlink
114    if path.exists() {
115        fail_point!("cmd::symlink-metadata", |_| {
116            Err(CliError::IoError(std::io::Error::other(
117                "injected: cmd::symlink-metadata",
118            )))
119        });
120        let metadata = fs::symlink_metadata(path).map_err(|_| {
121            CliError::IoError(std::io::Error::other(
122                "Failed to get path metadata",
123            ))
124        })?;
125
126        if metadata.file_type().is_symlink() {
127            return Err(CliError::InvalidPath {
128                field: field.to_string(),
129                details: "Path is a symlink".to_string(),
130            });
131        }
132    }
133
134    Ok(())
135}
136
137#[cfg(test)]
138#[allow(clippy::unwrap_used, clippy::expect_used)]
139mod tests {
140    use super::*;
141    #[cfg(not(target_os = "windows"))]
142    use clap::Command;
143    use tempfile::tempdir;
144
145    #[test]
146    fn test_url_validation() {
147        let cmd = crate::cmd::Cli::build();
148        let _matches = cmd.get_matches_from(vec![
149            "ssg",
150            "--new",
151            "dummy_site",
152            "--content",
153            "dummy_content",
154            "--output",
155            "dummy_output",
156            "--template",
157            "dummy_template",
158        ]);
159
160        assert!(validate_url("http://example.com").is_ok());
161        assert!(validate_url("javascript:alert(1)").is_err());
162        assert!(validate_url("https://example.com<script>").is_err());
163    }
164
165    #[test]
166    fn test_path_safety() {
167        let valid = Path::new("valid");
168        let absolute_valid = std::env::current_dir().unwrap().join(valid);
169        assert!(validate_path_safety(&absolute_valid, "test").is_ok());
170    }
171
172    #[test]
173    fn test_absolute_path_validation() {
174        let path = std::env::current_dir().unwrap().join("valid_path");
175        assert!(validate_path_safety(&path, "test").is_ok());
176    }
177
178    #[cfg(not(target_os = "windows"))] // Unix-specific: path behaviour / error messages differ on Windows
179    #[test]
180    fn test_path_with_separators() {
181        let cmd = Command::new("test_no_required_args");
182        let _matches = cmd.get_matches_from(vec!["test_no_required_args"]);
183
184        let path = Path::new("path/to\\file");
185        let result = validate_path_safety(path, "test");
186        assert!(result.is_err(), "Expected error for backslashes");
187    }
188
189    #[test]
190    fn test_symlink_path_validation() {
191        let temp_dir = tempdir().unwrap();
192        let target = temp_dir.path().join("target");
193        let symlink = temp_dir.path().join("symlink");
194
195        fs::write(&target, "content").unwrap();
196
197        #[cfg(unix)]
198        std::os::unix::fs::symlink(&target, &symlink).unwrap();
199        #[cfg(windows)]
200        std::os::windows::fs::symlink_file(&target, &symlink).unwrap();
201
202        let resolved_path = fs::canonicalize(&symlink).unwrap();
203        let normalized_target = fs::canonicalize(&target).unwrap();
204        println!("Resolved symlink path: {resolved_path:?}");
205        println!("Normalized target path: {normalized_target:?}");
206
207        let result = validate_path_safety(&symlink, "symlink");
208        assert!(result.is_err(), "Expected error for symlink path");
209        assert!(matches!(
210            result,
211            Err(CliError::InvalidPath { field: _, details }) if details.contains("symlink")
212        ));
213    }
214
215    #[test]
216    fn test_url_edge_cases() {
217        assert!(validate_url("http://").is_err());
218        assert!(validate_url("https://").is_err());
219        assert!(validate_url("http://example.com:65536").is_err());
220    }
221
222    #[test]
223    fn test_validate_url_ftp_scheme() {
224        assert!(validate_url("ftp://example.com").is_err());
225    }
226
227    #[test]
228    fn test_validate_path_with_invalid_chars() {
229        let result =
230            validate_path_safety(Path::new("path<with>invalid"), "test");
231        assert!(matches!(result, Err(CliError::InvalidPath { .. })));
232    }
233
234    #[test]
235    fn test_validate_path_with_traversal() {
236        let result = validate_path_safety(Path::new("../etc/passwd"), "test");
237        assert!(matches!(result, Err(CliError::InvalidPath { .. })));
238    }
239
240    #[test]
241    fn test_validate_path_with_reserved_name() {
242        let result = validate_path_safety(Path::new("con"), "test");
243        assert!(matches!(result, Err(CliError::InvalidPath { .. })));
244        let result = validate_path_safety(Path::new("aux"), "test");
245        assert!(matches!(result, Err(CliError::InvalidPath { .. })));
246    }
247
248    #[cfg(not(target_os = "windows"))]
249    #[test]
250    fn test_validate_path_with_backslash() {
251        let result =
252            validate_path_safety(Path::new("path\\with\\backslash"), "test");
253        assert!(matches!(result, Err(CliError::InvalidPath { .. })));
254    }
255
256    #[cfg(unix)]
257    #[test]
258    fn test_validate_path_existing_symlink() {
259        let temp_dir = tempdir().unwrap();
260        let target = temp_dir.path().join("real");
261        let link = temp_dir.path().join("link");
262        fs::create_dir(&target).unwrap();
263        std::os::unix::fs::symlink(&target, &link).unwrap();
264
265        let result = validate_path_safety(&link, "test");
266        assert!(matches!(result, Err(CliError::InvalidPath { .. })));
267    }
268
269    // -----------------------------------------------------------------
270    // is_valid_url -- edge cases
271    // -----------------------------------------------------------------
272
273    #[test]
274    fn is_valid_url_empty_string() {
275        assert!(!is_valid_url(""));
276    }
277
278    #[test]
279    fn is_valid_url_no_dot_in_host() {
280        assert!(!is_valid_url("http://localhost"));
281    }
282
283    #[test]
284    fn is_valid_url_just_scheme() {
285        assert!(!is_valid_url("http://"));
286        assert!(!is_valid_url("https://"));
287    }
288
289    #[test]
290    fn is_valid_url_with_port() {
291        assert!(is_valid_url("http://example.com:8080"));
292        assert!(is_valid_url("https://example.com:443"));
293    }
294
295    #[test]
296    fn is_valid_url_with_path() {
297        assert!(is_valid_url("http://example.com/path/to/page"));
298        assert!(is_valid_url("https://example.com/"));
299    }
300
301    #[test]
302    fn is_valid_url_invalid_port() {
303        assert!(!is_valid_url("http://example.com:99999"));
304        assert!(!is_valid_url("http://example.com:notaport"));
305    }
306
307    #[test]
308    fn is_valid_url_no_scheme() {
309        assert!(!is_valid_url("example.com"));
310        assert!(!is_valid_url("ftp://example.com"));
311    }
312
313    // -----------------------------------------------------------------
314    // validate_url -- invalid schemes, missing host
315    // -----------------------------------------------------------------
316
317    #[test]
318    fn validate_url_data_scheme_rejected() {
319        assert!(validate_url("data:text/html,<h1>hi</h1>").is_err());
320    }
321
322    #[test]
323    fn validate_url_vbscript_scheme_rejected() {
324        assert!(validate_url("vbscript:MsgBox").is_err());
325    }
326
327    #[test]
328    fn validate_url_missing_host_after_scheme() {
329        assert!(validate_url("http://").is_err());
330    }
331
332    #[test]
333    fn validate_url_angle_brackets_rejected() {
334        assert!(validate_url("http://example.com/<script>").is_err());
335    }
336
337    #[test]
338    fn validate_url_quote_rejected() {
339        assert!(validate_url("http://example.com/\"test").is_err());
340    }
341}