Skip to content

Commit 34c1609

Browse files
committed
Fix unused tool versions not removed in prek cache gc (#1436)
1 parent c34c41e commit 34c1609

3 files changed

Lines changed: 97 additions & 10 deletions

File tree

crates/prek/src/cli/cache_gc.rs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::ops::AddAssign;
44
use std::path::Path;
55

66
use anyhow::Result;
7+
use clap::ValueEnum;
78
use owo_colors::OwoColorize;
89
use rustc_hash::FxHashMap;
910
use rustc_hash::FxHashSet;
@@ -252,7 +253,6 @@ pub(crate) async fn cache_gc(
252253
)?;
253254

254255
// Sweep tools/<bucket>/<version>
255-
// Only do this when we can positively identify used versions (otherwise be conservative).
256256
let removed_tool_versions = sweep_tool_versions(store, &used_tool_versions, dry_run, verbose)?;
257257

258258
let mut removed_tools = removed_tool_buckets;
@@ -437,13 +437,9 @@ fn sweep_tool_versions(
437437
) -> Result<Removal> {
438438
let mut total = Removal::new(RemovalKind::Tools);
439439

440-
for (bucket, keep_versions) in used_tool_versions {
441-
// If we don't have any positively-identified versions, be conservative.
442-
if keep_versions.is_empty() {
443-
continue;
444-
}
445-
440+
for bucket in ToolBucket::value_variants() {
446441
let bucket_root = store.tools_path(*bucket);
442+
let keep_versions = used_tool_versions.get(bucket);
447443
let removed =
448444
sweep_tool_bucket_versions(*bucket, &bucket_root, keep_versions, dry_run, verbose)?;
449445
total += removed;
@@ -455,7 +451,7 @@ fn sweep_tool_versions(
455451
fn sweep_tool_bucket_versions(
456452
bucket: ToolBucket,
457453
bucket_root: &Path,
458-
keep_versions: &FxHashSet<String>,
454+
keep_versions: Option<&FxHashSet<String>>,
459455
dry_run: bool,
460456
collect_names: bool,
461457
) -> Result<Removal> {
@@ -486,7 +482,11 @@ fn sweep_tool_bucket_versions(
486482
let Some(version_name) = path.file_name().and_then(|n| n.to_str()) else {
487483
continue;
488484
};
489-
if keep_versions.contains(version_name) {
485+
// Skip hidden/system dirs.
486+
if version_name.starts_with('.') {
487+
continue;
488+
}
489+
if keep_versions.is_some_and(|keep| keep.contains(version_name)) {
490490
continue;
491491
}
492492

@@ -554,6 +554,10 @@ fn sweep_dir_by_name(
554554
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
555555
continue;
556556
};
557+
// Skip hidden/system dirs.
558+
if name.starts_with('.') {
559+
continue;
560+
}
557561
if keep_names.contains(name) {
558562
continue;
559563
}

crates/prek/src/store.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ impl Store {
301301
}
302302
}
303303

304-
#[derive(Copy, Clone, Eq, Hash, PartialEq)]
304+
#[derive(Copy, Clone, Eq, Hash, PartialEq, clap::ValueEnum)]
305305
pub(crate) enum ToolBucket {
306306
Uv,
307307
Python,

crates/prek/tests/cache.rs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,89 @@ fn cache_gc_prunes_unused_tool_versions() -> anyhow::Result<()> {
399399
Ok(())
400400
}
401401

402+
#[test]
403+
fn cache_gc_prunes_tool_versions_without_positive_identification() -> anyhow::Result<()> {
404+
let context = TestContext::new();
405+
406+
context.write_pre_commit_config(indoc::indoc! {r#"
407+
repos:
408+
- repo: local
409+
hooks:
410+
- id: local-python
411+
name: Local Python Hook
412+
entry: "python -c \"print(1)\""
413+
language: python
414+
"#});
415+
416+
let home = context.home_dir();
417+
418+
// Track the config so GC has something to mark from.
419+
let config_path = context.work_dir().child(CONFIG_FILE);
420+
write_config_tracking_file(home, &[config_path.path()])?;
421+
422+
// Seed a matching installed hook env marker, but use a toolchain path that is *not* inside
423+
// PREK_HOME/tools. This means we cannot positively identify a used tool version, so all
424+
// tool versions under the bucket are unused and should be pruned.
425+
let env_py = home.child("hooks/python-keep");
426+
env_py.create_dir_all()?;
427+
let marker_py = json!({
428+
"language": "python",
429+
"language_version": "3.12.0",
430+
"dependencies": [],
431+
"env_path": env_py.path(),
432+
"toolchain": "/usr/bin/python3",
433+
"extra": {},
434+
});
435+
env_py
436+
.child(".prek-hook.json")
437+
.write_str(&serde_json::to_string_pretty(&marker_py)?)?;
438+
439+
// Seed tool versions that should be removed.
440+
let py_312 = home.child("tools/python/3.12.0");
441+
let py_311 = home.child("tools/python/3.11.0");
442+
py_312.create_dir_all()?;
443+
py_311.create_dir_all()?;
444+
445+
// Add a temp dir to ensure it is not removed.
446+
home.child("repos/.temp").create_dir_all()?;
447+
home.child("tools/.temp").create_dir_all()?;
448+
449+
cmd_snapshot!(
450+
context.filters(),
451+
context.command().args(["cache", "gc", "--dry-run", "-v"]),
452+
@r"
453+
success: true
454+
exit_code: 0
455+
----- stdout -----
456+
Would remove 2 tools ([SIZE])
457+
458+
Would remove 2 tools:
459+
- python/3.11.0
460+
path: [HOME]/tools/python/3.11.0
461+
- python/3.12.0
462+
path: [HOME]/tools/python/3.12.0
463+
464+
----- stderr -----
465+
"
466+
);
467+
468+
cmd_snapshot!(context.filters(), context.command().args(["cache", "gc"]), @r"
469+
success: true
470+
exit_code: 0
471+
----- stdout -----
472+
Removed 2 tools ([SIZE])
473+
474+
----- stderr -----
475+
");
476+
477+
py_312.assert(predicates::path::missing());
478+
py_311.assert(predicates::path::missing());
479+
home.child("tools/python")
480+
.assert(predicates::path::is_dir());
481+
482+
Ok(())
483+
}
484+
402485
#[test]
403486
fn cache_gc_keeps_local_hook_env() -> anyhow::Result<()> {
404487
let context = TestContext::new();

0 commit comments

Comments
 (0)