Skip to content

Commit 0a00171

Browse files
CopilotarraykaCopilot
authored
VirtualStorageProvider: Make new() private, add new_physical (#764)
## Completed Changes - [x] Made `VirtualStorageProvider::new()` fully private (not just doc-hidden) - [x] Updated all unit tests in virtual_storage_provider.rs to use `new_memory()` - [x] Updated all usages across diskann-providers, diskann-disk, and diskann-tools to use specific constructors - [x] Added `VirtualStorageProvider::new_physical` to clippy.toml disallowed-methods list - [x] Removed all unused imports (MemoryFS, OverlayFS) - [x] Made test helper functions generic over filesystem type where needed - [x] **Fixed doc tests:** - Added missing `use std::io::Write;` import to doc examples - Removed doc examples that write to physical filesystem (new_physical and new_overlay) - Simplified to only show `new_memory()` usage - Removed unnecessary turbofish notation in test code - [x] Verified all checks pass: - ✅ cargo fmt --all --check - ✅ cargo clippy --locked --workspace --all-targets --no-deps - ✅ cargo test --doc --workspace - ✅ All unit tests pass All PR review feedback has been addressed. <!-- START COPILOT ORIGINAL PROMPT --> <details> <summary>Original prompt</summary> > > ---- > > *This section details on the original issue you should resolve* > > <issue_title>VirtualStorageProvider: encourage use of `new_overlay` or `new_memory`</issue_title> > <issue_description>Make `VirtualStorageProvider::new` private. > Also add VirtualStorageProvider<PhysicalFS>::new_physical<P: AsRef<std::path::Path>>(path: P). > Expose only new_overlay(), new_memory(), new_physical(). > > Examples like these should be replaced with VirtualStorageProvider::new_memory(): > let fs = OverlayFS::new(&[MemoryFS::default().into()]); > let storage_provider = VirtualStorageProvider::new(fs); > > Original post: > > Do you see any value in making `VirtualStorageProvider::new` much harder to call to encourage use of `new_overlay` or `new_memory`? Maybe with like a `new_physical` and then linting that with clippy? > > _Originally posted by @hildebrandmw in [#700](https://github.com/microsoft/DiskANN/pull/700/changes#r2722436904)_</issue_description> > > ## Comments on the Issue (you are @copilot in this section) > > <comments> > </comments> > </details> <!-- START COPILOT CODING AGENT SUFFIX --> - Fixes #703 <!-- START COPILOT CODING AGENT TIPS --> --- 💬 We'd love your input! Share your thoughts on Copilot coding agent in our [2 minute survey](https://gh.io/copilot-coding-agent-survey). --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: arrayka <1551741+arrayka@users.noreply.github.com> Co-authored-by: arrayka <arrayka@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 456c94a commit 0a00171

25 files changed

Lines changed: 134 additions & 190 deletions

clippy.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ disallowed-methods = [
33
# We still have `std::fs` references.
44
# { path = "std::fs", reason = "Do not use std::fs directly. Instead, use the FileStorageProvider." },
55
{ path = "vfs::PhysicalFS::new", reason = "Do not use vfs::PhysicalFS in tests. Instead, use the VirtualStorageProvider::new_overlay()." },
6+
{ path = "diskann_providers::storage::virtual_storage_provider::VirtualStorageProvider::new_physical", reason = "Do not use VirtualStorageProvider::new_physical() in tests. Instead, use VirtualStorageProvider::new_overlay()." },
67
# Disallowed methods for the rayon crate to enforce execution within a specified thread pool instead of the global thread pool.
78
{ path = "rayon::iter::ParallelIterator::for_each", reason = "Use `for_each_in_pool` from rayon_utils.rs instead to enforce execution within a specified thread pool."},
89
{ path = "rayon::iter::ParallelIterator::for_each_with", reason = "Use `for_each_with_in_pool` instead to enforce execution within a specified thread pool."},

diskann-disk/src/build/builder/build.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -916,7 +916,6 @@ mod start_point_tests {
916916

917917
use diskann_providers::storage::VirtualStorageProvider;
918918
use diskann_providers::utils::write_metadata;
919-
use vfs::MemoryFS;
920919

921920
use super::*;
922921

@@ -930,8 +929,7 @@ mod start_point_tests {
930929
#[test]
931930
fn test_start_point_save_and_load() {
932931
let file_path = "/start_point_test.bin";
933-
let fs = MemoryFS::new();
934-
let storage_provider = VirtualStorageProvider::new(fs);
932+
let storage_provider = VirtualStorageProvider::new_memory();
935933

936934
// Create and save a start point
937935
let id = 42u32;
@@ -945,7 +943,7 @@ mod start_point_tests {
945943

946944
#[test]
947945
fn test_start_point_load_nonexistent_file() {
948-
let storage_provider = VirtualStorageProvider::new(MemoryFS::new());
946+
let storage_provider = VirtualStorageProvider::new_memory();
949947
let result = StartPoint::load("/nonexistent_file.bin", &storage_provider);
950948
assert_eq!(
951949
result.err().unwrap().kind(),
@@ -956,8 +954,7 @@ mod start_point_tests {
956954
#[test]
957955
fn test_start_point_load_empty_file() {
958956
let file_path = "/empty_file.bin";
959-
let fs = MemoryFS::new();
960-
let storage_provider = VirtualStorageProvider::new(fs);
957+
let storage_provider = VirtualStorageProvider::new_memory();
961958

962959
// Create an empty file
963960
{
@@ -972,8 +969,7 @@ mod start_point_tests {
972969
#[test]
973970
fn test_start_point_load_invalid_data() {
974971
let file_path = "/invalid_data.bin";
975-
let fs = MemoryFS::new();
976-
let storage_provider = VirtualStorageProvider::new(fs);
972+
let storage_provider = VirtualStorageProvider::new_memory();
977973

978974
// Create a file with invalid data
979975
{

diskann-disk/src/storage/cached_reader.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,7 @@ mod cached_reader_test {
130130
0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, 0x00, 0x00, 0x50, 0x41,
131131
0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x11, 0x80, 0x41,
132132
];
133-
let filesystem = MemoryFS::new();
134-
let storage_provider = VirtualStorageProvider::new(filesystem);
133+
let storage_provider = VirtualStorageProvider::new_memory();
135134
{
136135
let mut writer = storage_provider.create_for_write(file_name).unwrap();
137136
writer.write_all(&data).unwrap();
@@ -190,8 +189,7 @@ mod cached_reader_test {
190189
0x20, 0x41, 0x00, 0x00, 0x30, 0x41, 0x00, 0x00, 0x40, 0x41, 0x00, 0x00, 0x50, 0x41,
191190
0x00, 0x00, 0x60, 0x41, 0x00, 0x00, 0x70, 0x41, 0x00, 0x11, 0x80, 0x41,
192191
];
193-
let filesystem = MemoryFS::new();
194-
let storage_provider = VirtualStorageProvider::new(filesystem);
192+
let storage_provider = VirtualStorageProvider::new_memory();
195193
{
196194
let mut writer = storage_provider.create_for_write(file_name).unwrap();
197195
writer.write_all(&data).unwrap();

diskann-disk/src/storage/quant/generator.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ mod generator_tests {
283283
create_thread_pool_for_test, read_metadata, save_bin_f32, save_bytes,
284284
};
285285
use rstest::rstest;
286-
use vfs::{FileSystem, MemoryFS, OverlayFS};
286+
use vfs::{FileSystem, MemoryFS};
287287

288288
use super::*;
289289
use crate::build::chunking::continuation::{
@@ -373,11 +373,12 @@ mod generator_tests {
373373
dim: usize,
374374
offset: usize,
375375
output_dim: u32,
376-
) -> ANNResult<(VirtualStorageProvider<OverlayFS>, String, String)> {
377-
let fs = OverlayFS::new(&[MemoryFS::default().into()]);
378-
fs.create_dir("/test_data")
376+
) -> ANNResult<(VirtualStorageProvider<MemoryFS>, String, String)> {
377+
let storage_provider = VirtualStorageProvider::new_memory();
378+
storage_provider
379+
.filesystem()
380+
.create_dir("/test_data")
379381
.expect("Could not create test directory");
380-
let storage_provider = VirtualStorageProvider::new(fs);
381382

382383
let data_path = "/test_data/test_data.bin".to_string();
383384
let compressed_path = "/test_data/test_compressed.bin".to_string();
@@ -411,10 +412,10 @@ mod generator_tests {
411412
Ok((storage_provider, data_path, compressed_path))
412413
}
413414

414-
fn create_and_call_generator(
415+
fn create_and_call_generator<F: vfs::FileSystem>(
415416
offset: usize,
416417
compressed_path: String,
417-
storage_provider: &VirtualStorageProvider<OverlayFS>,
418+
storage_provider: &VirtualStorageProvider<F>,
418419
data_path: String,
419420
output_dim: u32,
420421
chunking_config: &ChunkingConfig,

diskann-disk/src/storage/quant/pq/pq_generation.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ mod pq_generation_tests {
194194
use diskann_utils::views::{MatrixView, MutMatrixView};
195195
use diskann_vector::distance::Metric;
196196
use rstest::rstest;
197-
use vfs::{FileSystem, MemoryFS, OverlayFS};
197+
use vfs::FileSystem;
198198

199199
use super::{CompressionStage, PQGeneration, PQGenerationContext};
200200
use crate::storage::quant::compressor::QuantCompressor;
@@ -210,9 +210,9 @@ mod pq_generation_tests {
210210
100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32, 100.0f32,
211211
];
212212
#[allow(clippy::too_many_arguments)]
213-
fn create_new_compressor<'a, R: AsThreadPool>(
213+
fn create_new_compressor<'a, R: AsThreadPool, F: vfs::FileSystem>(
214214
stage: CompressionStage,
215-
provider: &'a VirtualStorageProvider<OverlayFS>,
215+
provider: &'a VirtualStorageProvider<F>,
216216
dim: usize,
217217
num_chunks: usize,
218218
max_kmeans_reps: usize,
@@ -222,7 +222,7 @@ mod pq_generation_tests {
222222
pivots_path: String,
223223
compressed_path: String,
224224
data_path: Option<&str>,
225-
) -> Result<PQGeneration<'a, f32, VirtualStorageProvider<OverlayFS>, R>, ANNError> {
225+
) -> Result<PQGeneration<'a, f32, VirtualStorageProvider<F>, R>, ANNError> {
226226
let pq_storage = PQStorage::new(&pivots_path, &compressed_path, data_path);
227227
let context = PQGenerationContext::<'_, _, _> {
228228
pq_storage,
@@ -241,10 +241,11 @@ mod pq_generation_tests {
241241

242242
#[rstest]
243243
fn test_create_and_load_pivots_file() {
244-
let fs = OverlayFS::new(&[MemoryFS::default().into()]);
245-
fs.create_dir("/pq_generation_tests")
244+
let storage_provider = VirtualStorageProvider::new_memory();
245+
storage_provider
246+
.filesystem()
247+
.create_dir("/pq_generation_tests")
246248
.expect("Could not create test directory");
247-
let storage_provider = VirtualStorageProvider::new(fs);
248249

249250
let pivot_file_name = "/pq_generation_tests/generate_pq_pivots_test.bin";
250251
let pivot_file_name_compressor = "/pq_generation_tests/compressor_pivots_test.bin";
@@ -319,13 +320,11 @@ mod pq_generation_tests {
319320

320321
#[rstest]
321322
fn throw_error_for_resume_and_no_existing_file() {
322-
let fs = OverlayFS::new(&[
323-
MemoryFS::default().into(),
324-
// PhysicalFS::new("tests/data/").into(),
325-
]);
326-
fs.create_dir("/pq_generation_tests")
323+
let storage_provider = VirtualStorageProvider::new_memory();
324+
storage_provider
325+
.filesystem()
326+
.create_dir("/pq_generation_tests")
327327
.expect("Could not create test directory");
328-
let storage_provider = VirtualStorageProvider::new(fs);
329328

330329
let pivot_file_name = "/pq_generation_tests/generate_pq_pivots_test.bin";
331330
let compressed_file_name = "/pq_generation_tests/compressed_not_used.bin";

diskann-providers/benches/benchmarks/copy_aligned_data_bench.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use diskann_providers::{
1414
};
1515
use rand::Rng;
1616
use tempfile::TempDir;
17-
use vfs::PhysicalFS;
1817

1918
pub const TEST_DATA_PATH: &str = "test_aligned_data.bin";
2019
pub const BENCHMARK_ID: &str = "copy_aligned_data";
@@ -25,7 +24,7 @@ pub fn benchmark_copy_aligned_data(c: &mut Criterion) {
2524
clippy::disallowed_methods,
2625
reason = "Use physical file system rather than memory for testing the actual disk read/write"
2726
)]
28-
let storage_provider = VirtualStorageProvider::new(PhysicalFS::new(tmp_dir.path()));
27+
let storage_provider = VirtualStorageProvider::new_physical(tmp_dir.path());
2928

3029
let num_points = 1_000_000;
3130
let num_pq_chunks = 192;

diskann-providers/benches/benchmarks_iai/copy_aligned_data_bench_iai.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use diskann_providers::{
1414
use iai_callgrind::black_box;
1515
use rand::Rng;
1616
use tempfile::TempDir;
17-
use vfs::PhysicalFS;
1817

1918
pub const TEST_DATA_PATH: &str = "test_aligned_data.bin";
2019
pub const BENCHMARK_ID: &str = "copy_aligned_data";
@@ -31,7 +30,7 @@ pub fn benchmark_copy_aligned_data_iai() {
3130
clippy::disallowed_methods,
3231
reason = "Use physical file system rather than memory for testing the actual disk read/write"
3332
)]
34-
let storage_provider = VirtualStorageProvider::new(PhysicalFS::new(tmp_dir.path()));
33+
let storage_provider = VirtualStorageProvider::new_physical(tmp_dir.path());
3534

3635
let num_points = 1_000_000;
3736
let num_pq_chunks = 192;

diskann-providers/src/model/graph/provider/async_/fast_memory_quant_vector_provider.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,6 @@ mod tests {
366366
use crate::storage::VirtualStorageProvider;
367367
use diskann::{ANNErrorKind, utils::ONE};
368368
use diskann_vector::{DistanceFunction, PreprocessedDistanceFunction, distance::Metric};
369-
use vfs::MemoryFS;
370369

371370
use super::*;
372371

@@ -522,7 +521,7 @@ mod tests {
522521
fn test_direct_save_load() {
523522
type Provider = FastMemoryQuantVectorProviderAsync;
524523

525-
let storage = VirtualStorageProvider::new(MemoryFS::new());
524+
let storage = VirtualStorageProvider::new_memory();
526525
let provider = create_test_provider();
527526

528527
// Save to disk.
@@ -546,7 +545,7 @@ mod tests {
546545
async fn test_async_save_load() {
547546
type Provider = FastMemoryQuantVectorProviderAsync;
548547

549-
let storage = VirtualStorageProvider::new(MemoryFS::new());
548+
let storage = VirtualStorageProvider::new_memory();
550549
let provider = create_test_provider();
551550

552551
let prefix = "/data.bin";

diskann-providers/src/model/graph/provider/async_/fast_memory_vector_provider.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,6 @@ mod tests {
334334
use crate::storage::VirtualStorageProvider;
335335
use diskann::utils::vecid_from_usize;
336336
use diskann_vector::distance::Metric;
337-
use vfs::MemoryFS;
338337

339338
use super::*;
340339
use crate::test_utils::graph_data_type_utils::GraphDataF32VectorUnitData;
@@ -449,7 +448,7 @@ mod tests {
449448
#[test]
450449
fn test_direct_save_load() {
451450
type Provider = FastMemoryVectorProviderAsync<GraphDataF32VectorUnitData>;
452-
let storage = VirtualStorageProvider::new(MemoryFS::new());
451+
let storage = VirtualStorageProvider::new_memory();
453452
let provider = create_test_provider();
454453

455454
// Save to disk.
@@ -472,7 +471,7 @@ mod tests {
472471
async fn test_async_save() {
473472
type Provider = FastMemoryVectorProviderAsync<GraphDataF32VectorUnitData>;
474473

475-
let storage = VirtualStorageProvider::new(MemoryFS::new());
474+
let storage = VirtualStorageProvider::new_memory();
476475

477476
let provider = create_test_provider();
478477

diskann-providers/src/model/graph/provider/async_/inmem/scalar.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,6 @@ mod tests {
833833
use diskann_utils::views::MatrixView;
834834
use diskann_vector::distance::Metric;
835835
use rstest::rstest;
836-
use vfs::MemoryFS;
837836

838837
use super::*;
839838

@@ -989,7 +988,7 @@ mod tests {
989988

990989
#[tokio::test]
991990
async fn test_save_with_and_load_with() {
992-
let storage_provider = VirtualStorageProvider::new(MemoryFS::default());
991+
let storage_provider = VirtualStorageProvider::new_memory();
993992
let store = make_store(Metric::InnerProduct);
994993

995994
// Save to our memory provider

0 commit comments

Comments
 (0)