Skip to main content

ssg/
fs_ops.rs

1// Copyright © 2023 - 2026 Static Site Generator (SSG). All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! File system operations: directory copying, safety validation, and traversal.
5
6use std::fs;
7use std::path::{Path, PathBuf};
8
9use anyhow::{ensure, Context, Result};
10use rayon::prelude::*;
11
12use crate::MAX_DIR_DEPTH;
13
14/// Minimum number of entries to justify Rayon parallel dispatch overhead.
15pub(crate) const PARALLEL_THRESHOLD: usize = 16;
16
17/// Validates and copies files from source to destination.
18///
19/// This function performs comprehensive safety checks before copying files,
20/// including path validation, symlink detection, and size limitations.
21///
22/// # Arguments
23///
24/// * `src` - Source path to copy from
25/// * `dst` - Destination path to copy to
26///
27/// # Returns
28///
29/// Returns `Ok(())` if the copy operation succeeds, or an error if:
30/// * Source path is invalid or inaccessible
31/// * Source contains symlinks (not allowed)
32/// * Files exceed size limits (default: 10MB)
33/// * Destination cannot be created or written to
34///
35/// # Example
36///
37/// ```rust,no_run
38/// use std::path::Path;
39/// use ssg::verify_and_copy_files;
40///
41/// fn main() -> anyhow::Result<()> {
42///     let source = Path::new("source_directory");
43///     let destination = Path::new("destination_directory");
44///
45///     verify_and_copy_files(source, destination)?;
46///     println!("Files copied successfully");
47///     Ok(())
48/// }
49/// ```
50///
51/// # Security
52///
53/// This function implements several security measures:
54/// * Path traversal prevention
55/// * Symlink restriction
56/// * File size limits
57/// * Permission validation
58pub fn verify_and_copy_files(src: &Path, dst: &Path) -> Result<()> {
59    ensure!(
60        is_safe_path(src)?,
61        "Source directory is unsafe or inaccessible: {}",
62        src.display()
63    );
64
65    if !src.exists() {
66        anyhow::bail!("Source directory does not exist: {}", src.display());
67    }
68
69    // If source is a file, verify its safety
70    if src.is_file() {
71        verify_file_safety(src)?;
72    }
73
74    // Ensure the destination directory exists
75    fs::create_dir_all(dst).with_context(|| {
76        format!(
77            "Failed to create or access destination directory at path: {}",
78            dst.display()
79        )
80    })?;
81
82    // Copy directory contents with safety checks
83    copy_dir_all(src, dst).with_context(|| {
84        format!(
85            "Failed to copy files from source: {} to destination: {}",
86            src.display(),
87            dst.display()
88        )
89    })?;
90
91    Ok(())
92}
93
94/// Asynchronously validates and copies files between directories.
95///
96/// Uses iterative traversal with an explicit stack to avoid unbounded recursion.
97/// Traversal depth is bounded by [`MAX_DIR_DEPTH`].
98pub fn verify_and_copy_files_async(src: &Path, dst: &Path) -> Result<()> {
99    if !src.exists() {
100        return Err(anyhow::anyhow!(
101            "Source directory does not exist: {}",
102            src.display()
103        ));
104    }
105
106    fs::create_dir_all(dst).with_context(|| {
107        format!(
108            "Failed to create or access destination directory at path: {}",
109            dst.display()
110        )
111    })?;
112
113    copy_directory_recursive(src, dst)
114}
115
116/// Iteratively copies a directory tree with depth bounds and safety checks.
117fn copy_directory_recursive(src: &Path, dst: &Path) -> Result<()> {
118    let mut stack = vec![(src.to_path_buf(), dst.to_path_buf(), 0usize)];
119
120    while let Some((src_dir, dst_dir, depth)) = stack.pop() {
121        ensure!(
122            depth < MAX_DIR_DEPTH,
123            "Directory nesting exceeds maximum depth of {}: {}",
124            MAX_DIR_DEPTH,
125            src_dir.display()
126        );
127
128        for entry in fs::read_dir(&src_dir)? {
129            let entry = entry?;
130            copy_entry(&entry, &dst_dir, depth, &mut stack)?;
131        }
132    }
133
134    Ok(())
135}
136
137/// Copies a single directory entry, pushing subdirs onto the stack.
138fn copy_entry(
139    entry: &fs::DirEntry,
140    dst_dir: &Path,
141    depth: usize,
142    stack: &mut Vec<(PathBuf, PathBuf, usize)>,
143) -> Result<()> {
144    let src_path = entry.path();
145    let dst_path = dst_dir.join(entry.file_name());
146
147    if src_path.is_dir() {
148        fs::create_dir_all(&dst_path)?;
149        stack.push((src_path, dst_path, depth + 1));
150    } else {
151        verify_file_safety(&src_path)?;
152        _ = fs::copy(&src_path, &dst_path)?;
153    }
154    Ok(())
155}
156
157/// Copies directories with a progress bar for feedback.
158///
159/// Uses iterative traversal with an explicit stack to avoid unbounded recursion.
160/// Traversal depth is bounded by [`MAX_DIR_DEPTH`].
161pub fn copy_dir_with_progress(src: &Path, dst: &Path) -> Result<()> {
162    if !src.exists() {
163        anyhow::bail!("Source directory does not exist: {}", src.display());
164    }
165
166    fs::create_dir_all(dst).with_context(|| {
167        format!("Failed to create destination directory: {}", dst.display())
168    })?;
169
170    let mut file_count: u64 = 0;
171
172    // (source_dir, dest_dir, depth)
173    let mut stack = vec![(src.to_path_buf(), dst.to_path_buf(), 0usize)];
174
175    while let Some((src_dir, dst_dir, depth)) = stack.pop() {
176        ensure!(
177            depth < MAX_DIR_DEPTH,
178            "Directory nesting exceeds maximum depth of {}: {}",
179            MAX_DIR_DEPTH,
180            src_dir.display()
181        );
182
183        let entries: Vec<_> = fs::read_dir(&src_dir)
184            .context(format!(
185                "Failed to read source directory: {}",
186                src_dir.display()
187            ))?
188            .collect::<std::io::Result<Vec<_>>>()?;
189
190        for entry in &entries {
191            let src_path = entry.path();
192            let dst_path = dst_dir.join(entry.file_name());
193
194            if src_path.is_dir() {
195                fs::create_dir_all(&dst_path)?;
196                stack.push((src_path, dst_path, depth + 1));
197            } else {
198                let _ = fs::copy(&src_path, &dst_path)?;
199            }
200            file_count += 1;
201        }
202    }
203
204    eprintln!("Copied {file_count} files");
205    Ok(())
206}
207
208/// Checks if a given path is safe to use.
209///
210/// Validates that the provided path does not contain directory traversal attempts
211/// or other potential security risks.
212///
213/// # Arguments
214///
215/// * `path` - The path to validate
216///
217/// # Returns
218///
219/// * `Ok(true)` - If the path is safe to use
220/// * `Ok(false)` - If the path contains unsafe elements
221/// * `Err` - If path validation fails
222///
223/// # Security
224///
225/// This function prevents directory traversal attacks by:
226/// * Resolving symbolic links
227/// * Checking for parent directory references (`..`)
228/// * Validating path components
229///
230pub fn is_safe_path(path: &Path) -> Result<bool> {
231    // Check for traversal patterns in non-existent paths
232    if !path.exists() {
233        let path_str = path.to_string_lossy();
234        if path_str.contains("..") {
235            return Ok(false);
236        }
237        return Ok(true); // Non-existent paths without traversal are safe
238    }
239
240    // canonicalize() resolves symlinks and all `..' components,
241    // so the resulting path is always absolute with no parent refs.
242    // A failure here (e.g. broken symlink) means the path is unsafe.
243    let _canonical = path
244        .canonicalize()
245        .context(format!("Failed to canonicalize path {}", path.display()))?;
246
247    Ok(true)
248}
249
250/// Verifies the safety of a file for processing.
251///
252/// Performs comprehensive safety checks on a file to ensure it meets security
253/// requirements before processing. These checks include symlink detection and
254/// file size validation.
255///
256/// # Arguments
257///
258/// * `path` - Reference to the path of the file to verify
259///
260/// # Returns
261///
262/// * `Ok(())` - If the file passes all safety checks
263/// * `Err` - If any safety check fails
264///
265/// # Safety Checks
266///
267/// * Symlinks: Not allowed (returns error)
268/// * File size: Must be under 10MB
269/// * File type: Must be a regular file
270///
271/// # Examples
272///
273/// Verifies the safety of a file.
274///
275/// ```rust
276/// use std::fs;
277/// use std::path::Path;
278/// use ssg::verify_file_safety;
279/// use tempfile::tempdir;
280///
281/// # fn main() -> anyhow::Result<()> {
282/// // Create temporary directory
283/// let temp_dir = tempdir()?;
284/// let file_path = temp_dir.path().join("index.md");
285///
286/// // Create test file
287/// fs::write(&file_path, "Hello, world!")?;
288///
289/// // Perform verification
290/// verify_file_safety(&file_path)?;
291///
292/// // Directory and file are automatically cleaned up
293/// # Ok(())
294/// # }
295/// ```
296///
297/// # Errors
298///
299/// Returns an error if:
300/// * File is a symlink
301/// * File size exceeds 10MB
302/// * Cannot read file metadata
303pub fn verify_file_safety(path: &Path) -> Result<()> {
304    const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024; // 10MB limit
305
306    // Get symlink metadata without following the symlink
307    let symlink_metadata = path.symlink_metadata().map_err(|e| {
308        anyhow::anyhow!(
309            "Failed to get symlink metadata for {}: {}",
310            path.display(),
311            e
312        )
313    })?;
314
315    // Explicitly check for symlinks first
316    if symlink_metadata.file_type().is_symlink() {
317        return Err(anyhow::anyhow!(
318            "Symlinks are not allowed: {}",
319            path.display()
320        ));
321    }
322
323    // Only check size if it's a regular file
324    if symlink_metadata.file_type().is_file()
325        && symlink_metadata.len() > MAX_FILE_SIZE
326    {
327        return Err(anyhow::anyhow!(
328            "File exceeds maximum allowed size of {} bytes: {}",
329            MAX_FILE_SIZE,
330            path.display()
331        ));
332    }
333
334    Ok(())
335}
336
337/// Recursively collects all file paths within a directory.
338///
339/// Traverses a directory tree and compiles a list of all file paths found,
340/// excluding directories themselves.
341///
342/// # Arguments
343///
344/// * `dir` - Reference to the directory to search
345/// * `files` - Mutable vector to store found file paths
346///
347/// # Returns
348///
349/// * `Ok(())` - If the collection process succeeds
350/// * `Err` - If any file system operation fails
351///
352/// # Examples
353///
354/// ```rust
355/// use std::path::{Path, PathBuf};
356/// use ssg::collect_files_recursive;
357///
358/// fn main() -> anyhow::Result<()> {
359///     let mut files = Vec::new();
360///     let dir_path = Path::new("./examples/content");
361///
362///     collect_files_recursive(dir_path, &mut files)?;
363///
364///     for file in files {
365///         println!("Found file: {}", file.display());
366///     }
367///
368///     Ok(())
369/// }
370/// ```
371///
372/// # Note
373///
374/// This function:
375/// * Only collects file paths, not directory paths
376/// * Rejects symbolic links (consistent with security model)
377/// * Maintains original path structure
378pub fn collect_files_recursive(
379    dir: &Path,
380    files: &mut Vec<PathBuf>,
381) -> Result<()> {
382    // (directory, depth)
383    let mut stack = vec![(dir.to_path_buf(), 0usize)];
384
385    while let Some((current_dir, depth)) = stack.pop() {
386        ensure!(
387            depth < MAX_DIR_DEPTH,
388            "Directory nesting exceeds maximum depth of {}: {}",
389            MAX_DIR_DEPTH,
390            current_dir.display()
391        );
392
393        for entry in fs::read_dir(&current_dir)? {
394            let path = entry?.path();
395
396            if path.is_dir() {
397                stack.push((path, depth + 1));
398            } else {
399                files.push(path);
400            }
401        }
402    }
403    Ok(())
404}
405
406/// Recursively copies a directory whilst maintaining structure and attributes.
407///
408/// Performs a deep copy of a directory tree, preserving file attributes and
409/// handling nested directories. Uses parallel processing for improved performance.
410///
411/// # Arguments
412///
413/// * `src` - Source directory path
414/// * `dst` - Destination directory path
415///
416/// # Returns
417///
418/// * `Ok(())` - If the copy operation succeeds
419/// * `Err` - If any part of the copy operation fails
420///
421/// # Performance
422///
423/// Uses rayon for parallel processing of files, significantly improving
424/// performance for directories with many files.
425///
426/// # Safety
427///
428/// * Verifies file safety before copying
429/// * Maintains original file permissions
430/// * Handles circular references
431pub fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
432    fs::create_dir_all(dst)?;
433
434    // (source_dir, dest_dir, depth)
435    let mut stack = vec![(src.to_path_buf(), dst.to_path_buf(), 0usize)];
436
437    while let Some((src_dir, dst_dir, depth)) = stack.pop() {
438        ensure!(
439            depth < MAX_DIR_DEPTH,
440            "Directory nesting exceeds maximum depth of {}: {}",
441            MAX_DIR_DEPTH,
442            src_dir.display()
443        );
444
445        let entries: Vec<_> =
446            fs::read_dir(&src_dir)?.collect::<std::io::Result<Vec<_>>>()?;
447
448        let (files, subdirs) = partition_entries(&entries, &dst_dir);
449
450        copy_files_maybe_parallel(&files, &dst_dir)?;
451
452        for (sub_src, sub_dst) in subdirs {
453            fs::create_dir_all(&sub_dst)?;
454            stack.push((sub_src, sub_dst, depth + 1));
455        }
456    }
457
458    Ok(())
459}
460
461/// Separates directory entries into files and subdirectories.
462fn partition_entries<'a>(
463    entries: &'a [fs::DirEntry],
464    dst_dir: &Path,
465) -> (Vec<&'a fs::DirEntry>, Vec<(PathBuf, PathBuf)>) {
466    let mut subdirs = Vec::new();
467    let files: Vec<_> = entries
468        .iter()
469        .filter(|entry| {
470            let path = entry.path();
471            if path.is_dir() {
472                subdirs.push((path, dst_dir.join(entry.file_name())));
473                false
474            } else {
475                true
476            }
477        })
478        .collect();
479    (files, subdirs)
480}
481
482/// Copies file entries, using parallel dispatch when the count justifies it.
483fn copy_files_maybe_parallel(
484    files: &[&fs::DirEntry],
485    dst_dir: &Path,
486) -> Result<()> {
487    let copy_file = |entry: &&fs::DirEntry| -> Result<()> {
488        let src_path = entry.path();
489        let dst_path = dst_dir.join(entry.file_name());
490        verify_file_safety(&src_path)?;
491        _ = fs::copy(&src_path, &dst_path)?;
492        Ok(())
493    };
494
495    if files.len() >= PARALLEL_THRESHOLD {
496        files.par_iter().try_for_each(copy_file)?;
497    } else {
498        files.iter().try_for_each(copy_file)?;
499    }
500    Ok(())
501}
502
503/// Asynchronously copies an entire directory structure, preserving file attributes and handling nested directories.
504///
505/// # Parameters
506///
507/// * `src`: A reference to the source directory path.
508/// * `dst`: A reference to the destination directory path.
509///
510/// # Returns
511///
512/// * `Result<()>`:
513///   - `Ok(())`: If the directory copying is successful.
514///   - `Err(e)`: If an error occurs during the directory copying, where `e` is the associated error.
515///
516/// # Errors
517///
518/// This function can return the following errors:
519///
520/// * `std::io::Error`: If an error occurs during directory creation, file copying, or permission issues.
521/// * `anyhow::Error`: If a file safety check fails.
522pub fn copy_dir_all_async(src: &Path, dst: &Path) -> Result<()> {
523    internal_copy_dir_async(src, dst)
524}
525
526fn internal_copy_dir_async(src: &Path, dst: &Path) -> Result<()> {
527    fs::create_dir_all(dst)?;
528
529    // (source_dir, dest_dir, depth)
530    let mut stack = vec![(src.to_path_buf(), dst.to_path_buf(), 0usize)];
531
532    while let Some((src_path, dst_path, depth)) = stack.pop() {
533        ensure!(
534            depth < MAX_DIR_DEPTH,
535            "Directory nesting exceeds maximum depth of {}: {}",
536            MAX_DIR_DEPTH,
537            src_path.display()
538        );
539
540        for entry in fs::read_dir(&src_path)? {
541            let entry = entry?;
542            let src_entry = entry.path();
543            let dst_entry = dst_path.join(entry.file_name());
544
545            if src_entry.is_dir() {
546                fs::create_dir_all(&dst_entry)?;
547                stack.push((src_entry, dst_entry, depth + 1));
548            } else {
549                verify_file_safety(&src_entry)?;
550                _ = fs::copy(&src_entry, &dst_entry)?;
551            }
552        }
553    }
554
555    Ok(())
556}
557
558#[cfg(test)]
559#[allow(clippy::unwrap_used, clippy::expect_used)]
560mod tests {
561    use super::*;
562    use tempfile::tempdir;
563
564    #[test]
565    fn copy_dir_all_copies_files() {
566        let src = tempdir().unwrap();
567        let dst = tempdir().unwrap();
568        fs::write(src.path().join("a.txt"), "hello").unwrap();
569        fs::write(src.path().join("b.txt"), "world").unwrap();
570
571        copy_dir_all(src.path(), dst.path()).unwrap();
572
573        assert_eq!(
574            fs::read_to_string(dst.path().join("a.txt")).unwrap(),
575            "hello"
576        );
577        assert_eq!(
578            fs::read_to_string(dst.path().join("b.txt")).unwrap(),
579            "world"
580        );
581    }
582
583    #[test]
584    fn copy_dir_all_nested_preserves_structure() {
585        let src = tempdir().unwrap();
586        let dst = tempdir().unwrap();
587        let nested = src.path().join("sub").join("deep");
588        fs::create_dir_all(&nested).unwrap();
589        fs::write(nested.join("file.txt"), "nested content").unwrap();
590        fs::write(src.path().join("root.txt"), "root").unwrap();
591
592        copy_dir_all(src.path(), dst.path()).unwrap();
593
594        assert_eq!(
595            fs::read_to_string(dst.path().join("sub/deep/file.txt")).unwrap(),
596            "nested content"
597        );
598        assert_eq!(
599            fs::read_to_string(dst.path().join("root.txt")).unwrap(),
600            "root"
601        );
602    }
603
604    #[test]
605    fn copy_dir_all_nonexistent_src_returns_error() {
606        let dst = tempdir().unwrap();
607        let fake_src = dst.path().join("does_not_exist");
608
609        let result = copy_dir_all(&fake_src, dst.path());
610        assert!(result.is_err());
611    }
612
613    #[test]
614    fn is_safe_path_normal_relative() {
615        let tmp = tempdir().unwrap();
616        let file = tmp.path().join("safe.txt");
617        fs::write(&file, "ok").unwrap();
618
619        assert!(is_safe_path(&file).unwrap());
620    }
621
622    #[test]
623    fn is_safe_path_with_dotdot_nonexistent() {
624        let path = Path::new("some/../../../etc/passwd");
625        assert!(!is_safe_path(path).unwrap());
626    }
627
628    #[test]
629    fn is_safe_path_with_dotdot_existing() {
630        let tmp = tempdir().unwrap();
631        // Create a path that exists and canonicalises cleanly
632        let safe = tmp.path().join("a");
633        fs::create_dir_all(&safe).unwrap();
634        let dotdot_path = safe.join("..");
635        // canonicalize succeeds → safe
636        assert!(is_safe_path(&dotdot_path).unwrap());
637    }
638
639    #[test]
640    fn is_safe_path_absolute_existing() {
641        let tmp = tempdir().unwrap();
642        let file = tmp.path().join("abs.txt");
643        fs::write(&file, "data").unwrap();
644        // Absolute path that exists is safe
645        assert!(is_safe_path(&file).unwrap());
646    }
647
648    #[test]
649    fn verify_file_safety_valid_file() {
650        let tmp = tempdir().unwrap();
651        let file = tmp.path().join("ok.txt");
652        fs::write(&file, "small file").unwrap();
653
654        assert!(verify_file_safety(&file).is_ok());
655    }
656
657    #[test]
658    fn verify_file_safety_nonexistent() {
659        let tmp = tempdir().unwrap();
660        let missing = tmp.path().join("nope.txt");
661
662        // symlink_metadata fails on nonexistent file → Err
663        assert!(verify_file_safety(&missing).is_err());
664    }
665
666    #[test]
667    fn verify_file_safety_directory() {
668        let tmp = tempdir().unwrap();
669        // Directories are not files but should not error (size check skipped)
670        assert!(verify_file_safety(tmp.path()).is_ok());
671    }
672
673    #[test]
674    fn collect_files_recursive_finds_all() {
675        let tmp = tempdir().unwrap();
676        let sub = tmp.path().join("sub");
677        fs::create_dir_all(&sub).unwrap();
678        fs::write(tmp.path().join("a.md"), "").unwrap();
679        fs::write(sub.join("b.md"), "").unwrap();
680        fs::write(sub.join("c.txt"), "").unwrap();
681
682        let mut files = Vec::new();
683        collect_files_recursive(tmp.path(), &mut files).unwrap();
684
685        assert_eq!(files.len(), 3);
686    }
687
688    #[test]
689    fn collect_files_recursive_empty_dir() {
690        let tmp = tempdir().unwrap();
691
692        let mut files = Vec::new();
693        collect_files_recursive(tmp.path(), &mut files).unwrap();
694
695        assert!(files.is_empty());
696    }
697
698    #[test]
699    fn collect_files_recursive_only_files_not_dirs() {
700        let tmp = tempdir().unwrap();
701        let sub = tmp.path().join("subdir");
702        fs::create_dir_all(&sub).unwrap();
703        fs::write(sub.join("only.txt"), "data").unwrap();
704
705        let mut files = Vec::new();
706        collect_files_recursive(tmp.path(), &mut files).unwrap();
707
708        assert_eq!(files.len(), 1);
709        assert!(files[0].ends_with("only.txt"));
710    }
711
712    #[test]
713    fn verify_and_copy_files_end_to_end() {
714        let src = tempdir().unwrap();
715        let dst = tempdir().unwrap();
716        let target = dst.path().join("output");
717        fs::write(src.path().join("page.html"), "<h1>Hi</h1>").unwrap();
718
719        verify_and_copy_files(src.path(), &target).unwrap();
720
721        assert_eq!(
722            fs::read_to_string(target.join("page.html")).unwrap(),
723            "<h1>Hi</h1>"
724        );
725    }
726
727    #[test]
728    fn copy_dir_with_progress_smoke() {
729        let src = tempdir().unwrap();
730        let dst = tempdir().unwrap();
731        fs::write(src.path().join("f.txt"), "data").unwrap();
732
733        // Should not panic
734        copy_dir_with_progress(src.path(), &dst.path().join("out")).unwrap();
735    }
736
737    #[test]
738    fn copy_dir_with_progress_nonexistent_src() {
739        let tmp = tempdir().unwrap();
740        let fake = tmp.path().join("missing");
741
742        let result = copy_dir_with_progress(&fake, tmp.path());
743        assert!(result.is_err());
744    }
745}