@@ -400,6 +400,7 @@ def import_values(
400400 chrom_sizes : Genome | dict [str , int ],
401401 * ,
402402 file : Path | None = None ,
403+ whitelist : Path | list [str ] | None = None ,
403404 chunk_size : int = 200 ,
404405 backend : Literal ['hdf5' ] = 'hdf5' ,
405406) -> internal .AnnData :
@@ -410,13 +411,17 @@ def import_values(
410411 ----------
411412 input_dir
412413 Directory containing the input files. Each file corresponds to a single cell.
414+ chrom_sizes
415+ A Genome object or a dictionary containing chromosome sizes, for example,
416+ `{"chr1": 2393, "chr2": 2344, ...}`.
413417 file
414418 File name of the output h5ad file used to store the result. If provided,
415419 result will be saved to a backed AnnData, otherwise an in-memory AnnData
416420 is used.
417- chrom_sizes
418- A Genome object or a dictionary containing chromosome sizes, for example,
419- `{"chr1": 2393, "chr2": 2344, ...}`.
421+ whitelist
422+ File name or a list of barcodes. If it is a file name, each line
423+ must contain a valid barcode. When provided, only barcodes in the whitelist
424+ will be retained.
420425 chunk_size
421426 Increasing the chunk_size speeds up I/O but uses more memory.
422427 backend
@@ -433,9 +438,16 @@ def import_values(
433438 if len (chrom_sizes ) == 0 :
434439 raise ValueError ("chrom_size cannot be empty" )
435440
441+ if whitelist is not None :
442+ if isinstance (whitelist , str ) or isinstance (whitelist , Path ):
443+ with open (whitelist , "r" ) as fl :
444+ whitelist = set ([line .strip () for line in fl ])
445+ else :
446+ whitelist = set (whitelist )
447+
436448 adata = AnnData () if file is None else internal .AnnData (filename = file , backend = backend )
437449 internal .import_values (
438- adata , input_dir , chrom_sizes , chunk_size
450+ adata , input_dir , chrom_sizes , whitelist , chunk_size
439451 )
440452 return adata
441453
0 commit comments