1use crate::walk;
15use anyhow::{Context, Result};
16use std::{
17 fs,
18 path::{Path, PathBuf},
19};
20
21pub const DEFAULT_MEMORY_BUDGET_MB: usize = 512;
23
24const ESTIMATED_BYTES_PER_PAGE: usize = 64 * 1024; #[derive(Debug, Clone, Copy)]
30pub struct MemoryBudget {
31 pub max_bytes: usize,
33 pub batch_size: usize,
35}
36
37impl MemoryBudget {
38 #[must_use]
40 pub fn from_mb(mb: usize) -> Self {
41 let max_bytes = mb * 1024 * 1024;
42 let batch_size = (max_bytes / ESTIMATED_BYTES_PER_PAGE).max(10);
43 Self {
44 max_bytes,
45 batch_size,
46 }
47 }
48
49 #[must_use]
51 pub fn default_budget() -> Self {
52 Self::from_mb(DEFAULT_MEMORY_BUDGET_MB)
53 }
54}
55
56pub fn batched_content_files(
60 content_dir: &Path,
61 budget: &MemoryBudget,
62) -> Result<Vec<Vec<PathBuf>>> {
63 let all_files = walk::walk_files(content_dir, "md")
64 .with_context(|| format!("cannot walk {}", content_dir.display()))?;
65
66 if all_files.is_empty() {
67 return Ok(vec![]);
68 }
69
70 let batches: Vec<Vec<PathBuf>> = all_files
71 .chunks(budget.batch_size)
72 .map(|chunk| chunk.to_vec())
73 .collect();
74
75 log::info!(
76 "[streaming] {} file(s) in {} batch(es) (budget: {} MB, {} pages/batch)",
77 all_files.len(),
78 batches.len(),
79 budget.max_bytes / (1024 * 1024),
80 budget.batch_size,
81 );
82
83 Ok(batches)
84}
85
86pub fn compile_batch(
92 batch: &[PathBuf],
93 content_dir: &Path,
94 build_dir: &Path,
95 site_dir: &Path,
96 template_dir: &Path,
97 batch_idx: usize,
98) -> Result<()> {
99 if batch.is_empty() {
100 return Ok(());
101 }
102
103 let batch_content = build_dir.join(format!(".batch-{batch_idx}"));
105 fs::create_dir_all(&batch_content)?;
106
107 for file in batch {
109 let rel = file.strip_prefix(content_dir).unwrap_or(file);
110 let dest = batch_content.join(rel);
111 if let Some(parent) = dest.parent() {
112 fs::create_dir_all(parent)?;
113 }
114 let _ = fs::copy(file, &dest)?;
115 }
116
117 let batch_build = build_dir.join(format!(".batch-{batch_idx}-build"));
119 let batch_site = build_dir.join(format!(".batch-{batch_idx}-site"));
120 fs::create_dir_all(&batch_build)?;
121 fs::create_dir_all(&batch_site)?;
122
123 let compile_result = staticdatagen::compile(
124 &batch_build,
125 &batch_content,
126 &batch_site,
127 template_dir,
128 );
129
130 if compile_result.is_ok() {
132 fs::create_dir_all(site_dir)?;
133 merge_dir(&batch_site, site_dir)?;
134 }
135
136 let _ = fs::remove_dir_all(&batch_content);
138 let _ = fs::remove_dir_all(&batch_build);
139 let _ = fs::remove_dir_all(&batch_site);
140
141 compile_result.map_err(|e| anyhow::anyhow!("batch {batch_idx}: {e:?}"))
142}
143
144fn merge_dir(src: &Path, dst: &Path) -> Result<()> {
146 if !src.exists() {
147 return Ok(());
148 }
149
150 for entry in fs::read_dir(src)? {
151 let entry = entry?;
152 let path = entry.path();
153 let dest = dst.join(entry.file_name());
154
155 if path.is_dir() {
156 fs::create_dir_all(&dest)?;
157 merge_dir(&path, &dest)?;
158 } else {
159 let _ = fs::copy(&path, &dest)?;
160 }
161 }
162 Ok(())
163}
164
165#[must_use]
170pub fn should_stream(
171 content_dir: &Path,
172 budget: &MemoryBudget,
173 explicitly_set: bool,
174) -> bool {
175 if explicitly_set {
176 return true;
177 }
178
179 let count = walk::walk_files(content_dir, "md").map_or(0, |f| f.len());
180
181 count > budget.batch_size
182}
183
184#[cfg(test)]
185#[allow(clippy::unwrap_used, clippy::expect_used)]
186mod tests {
187 use super::*;
188 use tempfile::tempdir;
189
190 #[test]
191 fn memory_budget_from_mb() {
192 let budget = MemoryBudget::from_mb(256);
193 assert_eq!(budget.max_bytes, 256 * 1024 * 1024);
194 assert!(budget.batch_size > 0);
195 }
196
197 #[test]
198 fn memory_budget_default() {
199 let budget = MemoryBudget::default_budget();
200 assert_eq!(budget.max_bytes, 512 * 1024 * 1024);
201 }
202
203 #[test]
204 fn memory_budget_minimum_batch_size() {
205 let budget = MemoryBudget::from_mb(0);
206 assert!(
207 budget.batch_size >= 10,
208 "batch size should have a floor of 10"
209 );
210 }
211
212 #[test]
213 fn batched_content_files_empty_dir() {
214 let dir = tempdir().unwrap();
215 let content = dir.path().join("content");
216 fs::create_dir_all(&content).unwrap();
217
218 let budget = MemoryBudget::from_mb(512);
219 let batches = batched_content_files(&content, &budget).unwrap();
220 assert!(batches.is_empty());
221 }
222
223 #[test]
224 fn batched_content_files_splits_correctly() {
225 let dir = tempdir().unwrap();
226 let content = dir.path().join("content");
227 fs::create_dir_all(&content).unwrap();
228
229 for i in 0..25 {
230 fs::write(
231 content.join(format!("page{i}.md")),
232 format!("# Page {i}"),
233 )
234 .unwrap();
235 }
236
237 let budget = MemoryBudget {
238 max_bytes: 0,
239 batch_size: 10,
240 };
241 let batches = batched_content_files(&content, &budget).unwrap();
242
243 assert_eq!(batches.len(), 3); assert_eq!(batches[0].len(), 10);
245 assert_eq!(batches[1].len(), 10);
246 assert_eq!(batches[2].len(), 5);
247 }
248
249 #[test]
250 fn merge_dir_combines_files() {
251 let dir = tempdir().unwrap();
252 let src = dir.path().join("src");
253 let dst = dir.path().join("dst");
254 fs::create_dir_all(&src).unwrap();
255 fs::create_dir_all(&dst).unwrap();
256
257 fs::write(src.join("a.html"), "from src").unwrap();
258 fs::write(dst.join("b.html"), "existing").unwrap();
259
260 merge_dir(&src, &dst).unwrap();
261
262 assert_eq!(fs::read_to_string(dst.join("a.html")).unwrap(), "from src");
263 assert_eq!(fs::read_to_string(dst.join("b.html")).unwrap(), "existing");
264 }
265
266 #[test]
267 fn merge_dir_overwrites_on_conflict() {
268 let dir = tempdir().unwrap();
269 let src = dir.path().join("src");
270 let dst = dir.path().join("dst");
271 fs::create_dir_all(&src).unwrap();
272 fs::create_dir_all(&dst).unwrap();
273
274 fs::write(src.join("a.html"), "new").unwrap();
275 fs::write(dst.join("a.html"), "old").unwrap();
276
277 merge_dir(&src, &dst).unwrap();
278
279 assert_eq!(fs::read_to_string(dst.join("a.html")).unwrap(), "new");
280 }
281
282 #[test]
283 fn should_stream_when_explicitly_set() {
284 let dir = tempdir().unwrap();
285 let content = dir.path().join("content");
286 fs::create_dir_all(&content).unwrap();
287
288 let budget = MemoryBudget::default_budget();
289 assert!(should_stream(&content, &budget, true));
290 }
291
292 #[test]
293 fn compile_batch_empty_is_noop() {
294 let dir = tempdir().unwrap();
295 let result = compile_batch(
296 &[],
297 dir.path(),
298 &dir.path().join("build"),
299 &dir.path().join("site"),
300 &dir.path().join("templates"),
301 0,
302 );
303 assert!(result.is_ok());
304 }
305
306 #[test]
307 fn merge_dir_nonexistent_src_is_noop() {
308 let dir = tempdir().unwrap();
309 let result =
310 merge_dir(&dir.path().join("nonexistent"), &dir.path().join("dst"));
311 assert!(result.is_ok());
312 }
313
314 #[test]
315 fn merge_dir_nested() {
316 let dir = tempdir().unwrap();
317 let src = dir.path().join("src");
318 let dst = dir.path().join("dst");
319 let nested = src.join("sub");
320 fs::create_dir_all(&nested).unwrap();
321 fs::create_dir_all(&dst).unwrap();
322 fs::write(nested.join("file.txt"), "nested").unwrap();
323
324 merge_dir(&src, &dst).unwrap();
325 assert_eq!(
326 fs::read_to_string(dst.join("sub/file.txt")).unwrap(),
327 "nested"
328 );
329 }
330
331 #[test]
332 fn should_stream_large_site() {
333 let dir = tempdir().unwrap();
334 let content = dir.path().join("content");
335 fs::create_dir_all(&content).unwrap();
336 let budget = MemoryBudget {
339 max_bytes: 0,
340 batch_size: 2,
341 };
342 for i in 0..5 {
343 fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
344 }
345 assert!(should_stream(&content, &budget, false));
346 }
347
348 #[test]
349 fn should_not_stream_small_site() {
350 let dir = tempdir().unwrap();
351 let content = dir.path().join("content");
352 fs::create_dir_all(&content).unwrap();
353 fs::write(content.join("index.md"), "# Home").unwrap();
354
355 let budget = MemoryBudget::default_budget();
356 assert!(!should_stream(&content, &budget, false));
357 }
358
359 #[test]
364 fn memory_budget_from_mb_one() {
365 let budget = MemoryBudget::from_mb(1);
366 assert_eq!(budget.max_bytes, 1024 * 1024);
367 assert_eq!(budget.batch_size, 16);
369 }
370
371 #[test]
372 fn memory_budget_from_mb_very_large() {
373 let budget = MemoryBudget::from_mb(4096);
374 assert_eq!(budget.max_bytes, 4096 * 1024 * 1024);
375 assert_eq!(budget.batch_size, 65536);
377 }
378
379 #[test]
380 fn memory_budget_batch_size_floor_is_ten() {
381 let budget = MemoryBudget::from_mb(0);
383 assert_eq!(budget.max_bytes, 0);
384 assert_eq!(budget.batch_size, 10);
385 }
386
387 #[test]
388 fn memory_budget_default_budget_matches_constant() {
389 let budget = MemoryBudget::default_budget();
390 assert_eq!(budget.max_bytes, DEFAULT_MEMORY_BUDGET_MB * 1024 * 1024);
391 assert_eq!(
392 budget.batch_size,
393 MemoryBudget::from_mb(DEFAULT_MEMORY_BUDGET_MB).batch_size
394 );
395 }
396
397 #[test]
398 fn memory_budget_clone_copy_debug() {
399 let a = MemoryBudget::from_mb(128);
400 let b = a; assert_eq!(a.max_bytes, b.max_bytes);
402 assert_eq!(a.batch_size, b.batch_size);
403 let debug = format!("{a:?}");
404 assert!(debug.contains("MemoryBudget"));
405 }
406
407 #[test]
412 fn batched_content_files_nonexistent_dir_returns_empty() {
413 let dir = tempdir().unwrap();
414 let budget = MemoryBudget::from_mb(512);
415 let result =
416 batched_content_files(&dir.path().join("nonexistent"), &budget);
417 if let Ok(batches) = result {
420 assert!(batches.is_empty());
421 }
422 }
424
425 #[test]
426 fn batched_content_files_single_file() {
427 let dir = tempdir().unwrap();
428 let content = dir.path().join("content");
429 fs::create_dir_all(&content).unwrap();
430 fs::write(content.join("index.md"), "# Home").unwrap();
431
432 let budget = MemoryBudget::from_mb(512);
433 let batches = batched_content_files(&content, &budget).unwrap();
434 assert_eq!(batches.len(), 1);
435 assert_eq!(batches[0].len(), 1);
436 }
437
438 #[test]
439 fn batched_content_files_ignores_non_md() {
440 let dir = tempdir().unwrap();
441 let content = dir.path().join("content");
442 fs::create_dir_all(&content).unwrap();
443 fs::write(content.join("page.md"), "# Page").unwrap();
444 fs::write(content.join("image.png"), "fakepng").unwrap();
445 fs::write(content.join("style.css"), "body{}").unwrap();
446
447 let budget = MemoryBudget::from_mb(512);
448 let batches = batched_content_files(&content, &budget).unwrap();
449 let total: usize = batches.iter().map(|b| b.len()).sum();
450 assert_eq!(total, 1, "only .md files should be collected");
451 }
452
453 #[test]
454 fn batched_content_files_exact_batch_boundary() {
455 let dir = tempdir().unwrap();
456 let content = dir.path().join("content");
457 fs::create_dir_all(&content).unwrap();
458 for i in 0..10 {
459 fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
460 }
461
462 let budget = MemoryBudget {
463 max_bytes: 0,
464 batch_size: 10,
465 };
466 let batches = batched_content_files(&content, &budget).unwrap();
467 assert_eq!(batches.len(), 1);
468 assert_eq!(batches[0].len(), 10);
469 }
470
471 #[test]
472 fn batched_content_files_many_small_batches() {
473 let dir = tempdir().unwrap();
474 let content = dir.path().join("content");
475 fs::create_dir_all(&content).unwrap();
476 for i in 0..7 {
477 fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
478 }
479
480 let budget = MemoryBudget {
481 max_bytes: 0,
482 batch_size: 2,
483 };
484 let batches = batched_content_files(&content, &budget).unwrap();
485 assert_eq!(batches.len(), 4); assert_eq!(batches[3].len(), 1);
487 }
488
489 #[test]
490 fn batched_content_files_nested_directories() {
491 let dir = tempdir().unwrap();
492 let content = dir.path().join("content");
493 fs::create_dir_all(content.join("blog")).unwrap();
494 fs::create_dir_all(content.join("docs")).unwrap();
495 fs::write(content.join("index.md"), "# Index").unwrap();
496 fs::write(content.join("blog/post.md"), "# Post").unwrap();
497 fs::write(content.join("docs/api.md"), "# API").unwrap();
498
499 let budget = MemoryBudget::from_mb(512);
500 let batches = batched_content_files(&content, &budget).unwrap();
501 let total: usize = batches.iter().map(|b| b.len()).sum();
502 assert_eq!(total, 3);
503 }
504
505 #[test]
510 fn merge_dir_deeply_nested() {
511 let dir = tempdir().unwrap();
512 let src = dir.path().join("src");
513 let dst = dir.path().join("dst");
514 fs::create_dir_all(src.join("a/b/c")).unwrap();
515 fs::create_dir_all(&dst).unwrap();
516 fs::write(src.join("a/b/c/deep.txt"), "deep content").unwrap();
517
518 merge_dir(&src, &dst).unwrap();
519 assert_eq!(
520 fs::read_to_string(dst.join("a/b/c/deep.txt")).unwrap(),
521 "deep content"
522 );
523 }
524
525 #[test]
526 fn merge_dir_empty_src() {
527 let dir = tempdir().unwrap();
528 let src = dir.path().join("src");
529 let dst = dir.path().join("dst");
530 fs::create_dir_all(&src).unwrap();
531 fs::create_dir_all(&dst).unwrap();
532 fs::write(dst.join("existing.txt"), "keep").unwrap();
533
534 merge_dir(&src, &dst).unwrap();
535 assert_eq!(
536 fs::read_to_string(dst.join("existing.txt")).unwrap(),
537 "keep"
538 );
539 }
540
541 #[test]
542 fn merge_dir_multiple_files() {
543 let dir = tempdir().unwrap();
544 let src = dir.path().join("src");
545 let dst = dir.path().join("dst");
546 fs::create_dir_all(&src).unwrap();
547 fs::create_dir_all(&dst).unwrap();
548 for i in 0..5 {
549 fs::write(src.join(format!("f{i}.txt")), format!("data{i}"))
550 .unwrap();
551 }
552
553 merge_dir(&src, &dst).unwrap();
554 for i in 0..5 {
555 assert_eq!(
556 fs::read_to_string(dst.join(format!("f{i}.txt"))).unwrap(),
557 format!("data{i}")
558 );
559 }
560 }
561
562 #[test]
567 fn should_stream_with_no_content_dir() {
568 let dir = tempdir().unwrap();
569 let budget = MemoryBudget::from_mb(512);
570 assert!(!should_stream(
572 &dir.path().join("no-content"),
573 &budget,
574 false
575 ));
576 }
577
578 #[test]
579 fn should_stream_explicitly_set_overrides_count() {
580 let dir = tempdir().unwrap();
582 let content = dir.path().join("content");
583 fs::create_dir_all(&content).unwrap();
584
585 let budget = MemoryBudget::from_mb(512);
586 assert!(should_stream(&content, &budget, true));
587 }
588
589 #[test]
590 fn should_stream_exactly_at_batch_boundary() {
591 let dir = tempdir().unwrap();
592 let content = dir.path().join("content");
593 fs::create_dir_all(&content).unwrap();
594 let budget = MemoryBudget {
596 max_bytes: 0,
597 batch_size: 3,
598 };
599 for i in 0..3 {
600 fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
601 }
602 assert!(!should_stream(&content, &budget, false));
604 }
605
606 #[test]
607 fn should_stream_one_over_boundary() {
608 let dir = tempdir().unwrap();
609 let content = dir.path().join("content");
610 fs::create_dir_all(&content).unwrap();
611 let budget = MemoryBudget {
612 max_bytes: 0,
613 batch_size: 3,
614 };
615 for i in 0..4 {
616 fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
617 }
618 assert!(should_stream(&content, &budget, false));
620 }
621
622 #[test]
627 fn compile_batch_with_nonexistent_files_still_creates_dirs() {
628 let dir = tempdir().unwrap();
629 let content = dir.path().join("content");
630 let build = dir.path().join("build");
631 let site = dir.path().join("site");
632 let templates = dir.path().join("templates");
633 fs::create_dir_all(&content).unwrap();
634
635 let result = compile_batch(
638 &[content.join("nonexistent.md")],
639 &content,
640 &build,
641 &site,
642 &templates,
643 0,
644 );
645 let _ = result;
648 }
649
650 #[test]
651 fn compile_batch_creates_batch_content_dir() {
652 let dir = tempdir().unwrap();
653 let content = dir.path().join("content");
654 let build = dir.path().join("build");
655 let site = dir.path().join("site");
656 let templates = dir.path().join("templates");
657 fs::create_dir_all(&content).unwrap();
658 fs::create_dir_all(&templates).unwrap();
659 fs::write(content.join("page.md"), "---\ntitle: T\n---\n# Hi").unwrap();
660
661 let _result = compile_batch(
664 &[content.join("page.md")],
665 &content,
666 &build,
667 &site,
668 &templates,
669 42,
670 );
671 }
673}