Skip to content

Commit 50864e9

Browse files
committed
minor
1 parent 33961fe commit 50864e9

4 files changed

Lines changed: 29 additions & 8 deletions

File tree

snapatac2-core/src/preprocessing/import.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ pub fn import_values<A, I>(
318318
anndata: &A,
319319
values: I,
320320
chrom_sizes: &ChromSizes,
321+
white_list: Option<&HashSet<String>>,
321322
chunk_size: usize,
322323
) -> Result<()>
323324
where
@@ -373,6 +374,7 @@ where
373374
let chunked_values = values.chunk_by(|x| x.0.clone());
374375
let chunked_values = chunked_values
375376
.into_iter()
377+
.filter(|(key, _)| white_list.map_or(true, |x| x.contains(key)))
376378
.progress_with(spinner)
377379
.chunks(chunk_size);
378380
let arrays = chunked_values.into_iter().map(|chunk| {

snapatac2-python/python/snapatac2/plotting/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ def tsse(
9494
raise ValueError("TSS enrichment score is not computed, please run `metrics.tsse` first.")
9595

9696
selected_cells = np.where(adata.obs["n_fragment"] >= min_fragment)[0]
97-
x = adata.obs["n_fragment"][selected_cells]
98-
y = adata.obs["tsse"][selected_cells]
97+
x = adata.obs["n_fragment"].to_numpy()[selected_cells]
98+
y = adata.obs["tsse"].to_numpy()[selected_cells]
9999

100100
fig = kde2d(x, y, log_x=True, log_y=False)
101101
fig.update_layout(

snapatac2-python/python/snapatac2/preprocessing/_basic.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ def import_values(
400400
chrom_sizes: Genome | dict[str, int],
401401
*,
402402
file: Path | None = None,
403+
whitelist: Path | list[str] | None = None,
403404
chunk_size: int = 200,
404405
backend: Literal['hdf5'] = 'hdf5',
405406
) -> internal.AnnData:
@@ -410,13 +411,17 @@ def import_values(
410411
----------
411412
input_dir
412413
Directory containing the input files. Each file corresponds to a single cell.
414+
chrom_sizes
415+
A Genome object or a dictionary containing chromosome sizes, for example,
416+
`{"chr1": 2393, "chr2": 2344, ...}`.
413417
file
414418
File name of the output h5ad file used to store the result. If provided,
415419
result will be saved to a backed AnnData, otherwise an in-memory AnnData
416420
is used.
417-
chrom_sizes
418-
A Genome object or a dictionary containing chromosome sizes, for example,
419-
`{"chr1": 2393, "chr2": 2344, ...}`.
421+
whitelist
422+
File name or a list of barcodes. If it is a file name, each line
423+
must contain a valid barcode. When provided, only barcodes in the whitelist
424+
will be retained.
420425
chunk_size
421426
Increasing the chunk_size speeds up I/O but uses more memory.
422427
backend
@@ -433,9 +438,16 @@ def import_values(
433438
if len(chrom_sizes) == 0:
434439
raise ValueError("chrom_size cannot be empty")
435440

441+
if whitelist is not None:
442+
if isinstance(whitelist, str) or isinstance(whitelist, Path):
443+
with open(whitelist, "r") as fl:
444+
whitelist = set([line.strip() for line in fl])
445+
else:
446+
whitelist = set(whitelist)
447+
436448
adata = AnnData() if file is None else internal.AnnData(filename=file, backend=backend)
437449
internal.import_values(
438-
adata, input_dir, chrom_sizes, chunk_size
450+
adata, input_dir, chrom_sizes, whitelist, chunk_size
439451
)
440452
return adata
441453

snapatac2-python/src/preprocessing.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use pyo3::{prelude::*, pybacked::PyBackedStr};
1212
use snapatac2_core::feature_count::ValueType;
1313
use snapatac2_core::preprocessing::SummaryType;
1414
use std::collections::HashMap;
15+
use std::ffi::OsStr;
1516
use std::io::{BufRead, BufReader};
1617
use std::path::PathBuf;
1718
use std::{collections::BTreeMap, collections::HashSet, ops::Deref, str::FromStr};
@@ -239,14 +240,20 @@ pub(crate) fn import_contacts(
239240
}
240241

241242
#[pyfunction]
243+
#[pyo3(signature = (anndata, input_dir, chrom_size, chunk_size, white_list=None))]
242244
pub(crate) fn import_values(
243245
anndata: AnnDataLike,
244246
input_dir: PathBuf,
245247
chrom_size: BTreeMap<String, u64>,
246248
chunk_size: usize,
249+
white_list: Option<HashSet<String>>,
247250
) -> Result<()> {
248251
fn read_chrom_values(path: PathBuf) -> impl Iterator<Item = (String, BaseValue)> {
249-
let barcode = path.file_stem().unwrap().to_str().unwrap().to_string();
252+
let barcode = if path.ends_with(".gz") {
253+
<OsStr as AsRef<std::path::Path>>::as_ref(path.file_stem().unwrap()).file_stem()
254+
} else {
255+
path.file_stem()
256+
}.unwrap().to_str().unwrap().to_string();
250257
let reader = BufReader::new(utils::open_file_for_read(&path));
251258
reader.lines().skip(1).map(move |line| {
252259
let line = line.unwrap();
@@ -268,7 +275,7 @@ pub(crate) fn import_values(
268275

269276
macro_rules! run {
270277
($data:expr) => {
271-
preprocessing::import_values($data, sorted_values, &chrom_sizes, chunk_size)?
278+
preprocessing::import_values($data, sorted_values, &chrom_sizes, white_list.as_ref(), chunk_size)?
272279
};
273280
}
274281

0 commit comments

Comments
 (0)