Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tests/data/txt/regions-3col.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
20 1230237 1235237
X 10 10
28 changes: 28 additions & 0 deletions tests/test_bcftools_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,26 @@ def run_vcztools(args: str, expect_error=False) -> tuple[str, str]:
"view --no-version -r '20:1230236-' -i 'FMT/DP>3' -s 'NA00002,NA00003'",
"sample.vcf.gz"
),
(
"view --no-version -R tests/data/txt/regions-3col.tsv -i 'FMT/DP>3' -s 'NA00002,NA00003'", # noqa: E501
"sample.vcf.gz"
),
(
"view --no-version -t '20:1230236-' -i 'FMT/DP>3' -s 'NA00002,NA00003'",
"sample.vcf.gz"
),
(
"view --no-version -t '^20:1230236-' -i 'FMT/DP>3' -s 'NA00002,NA00003'",
"sample.vcf.gz"
),
(
"view --no-version -T tests/data/txt/regions-3col.tsv -i 'FMT/DP>3' -s 'NA00002,NA00003'", # noqa: E501
"sample.vcf.gz"
),
(
"view --no-version -T ^tests/data/txt/regions-3col.tsv -i 'FMT/DP>3' -s 'NA00002,NA00003'", # noqa: E501
"sample.vcf.gz"
),
(
"view --no-version -i 'FILTER=\"VQSRTrancheSNP99.80to100.00\"'",
"1kg_2020_chrM.vcf.gz"
Expand Down Expand Up @@ -239,6 +259,14 @@ def test_vcf_output_with_output_option(tmp_path, args, vcf_file):
r"query -f '[%CHROM %POS %SAMPLE %GT %DP %GQ\n]' -r '20:1230236-' -i 'FMT/DP>3' -S tests/data/txt/samples.txt", # noqa: E501
"sample.vcf.gz",
),
(
r"query -f '[%CHROM %POS %SAMPLE %GT %DP %GQ\n]' -R tests/data/txt/regions-3col.tsv -i 'FMT/DP>3' -s 'NA00002,NA00003'", # noqa: E501
"sample.vcf.gz",
),
(
r"query -f '[%CHROM %POS %SAMPLE %GT %DP %GQ\n]' -T ^tests/data/txt/regions-3col.tsv -i 'FMT/DP>3' -s 'NA00002,NA00003'", # noqa: E501
"sample.vcf.gz",
),
],
)
def test_output(tmp_path, args, vcf_name):
Expand Down
28 changes: 27 additions & 1 deletion tests/test_regions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
from numpy.testing import assert_array_equal

from vcztools.regions import parse_region_string
from vcztools.regions import parse_region_string, parse_regions, parse_targets


@pytest.mark.parametrize(
Expand All @@ -16,3 +17,28 @@ def test_parse_region_string(
targets: str, expected: tuple[str, int | None, int | None]
):
assert parse_region_string(targets) == expected


def test_parse_regions_file():
all_contigs = ["19", "20", "X"]
genomic_ranges = parse_regions(
regions=None,
all_contigs=all_contigs,
regions_file="tests/data/txt/regions-3col.tsv",
)
assert_array_equal(genomic_ranges.contigs, [1, 2])
assert_array_equal(genomic_ranges.starts, [1230236, 9])
assert_array_equal(genomic_ranges.ends, [1235237, 10])


def test_parse_targets_file():
all_contigs = ["19", "20", "X"]
genomic_ranges = parse_targets(
targets=None,
all_contigs=all_contigs,
targets_file="^tests/data/txt/regions-3col.tsv",
)
assert_array_equal(genomic_ranges.contigs, [1, 2])
assert_array_equal(genomic_ranges.starts, [1230236, 9])
assert_array_equal(genomic_ranges.ends, [1235237, 10])
assert genomic_ranges.complement
26 changes: 26 additions & 0 deletions vcztools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ def wrapper(*args, **kwargs):
default=None,
help="Regions to include.",
)
regions_file = click.option(
"-R",
"--regions-file",
type=str,
default=None,
help="File of regions to include.",
)
samples = click.option(
"-s",
"--samples",
Expand All @@ -90,6 +97,13 @@ def wrapper(*args, **kwargs):
default=None,
help="Target regions to include.",
)
targets_file = click.option(
"-T",
"--targets-file",
type=str,
default=None,
help="File of target regions to include.",
)
version = click.version_option(version=f"{provenance.__version__}")
zarr_backend_storage = click.option(
"--zarr-backend-storage",
Expand Down Expand Up @@ -159,10 +173,12 @@ def index(path, nrecords, stats, zarr_backend_storage):
default=None,
)
@regions
@regions_file
@force_samples
@samples
@samples_file
@targets
@targets_file
@include
@exclude
@click.option(
Expand All @@ -180,7 +196,9 @@ def query(
list_samples,
format,
regions,
regions_file,
targets,
targets_file,
force_samples,
samples,
samples_file,
Expand Down Expand Up @@ -216,7 +234,9 @@ def query(
output,
query_format=format,
regions=regions,
regions_file=regions_file,
targets=targets,
targets_file=targets_file,
samples=samples,
samples_file=samples_file,
force_samples=force_samples,
Expand Down Expand Up @@ -248,6 +268,7 @@ def query(
help="Do not append version and command line information to the output VCF header.",
)
@regions
@regions_file
@force_samples
@click.option(
"-I",
Expand All @@ -264,6 +285,7 @@ def query(
help="Drop genotypes.",
)
@targets
@targets_file
@include
@exclude
@zarr_backend_storage
Expand All @@ -275,7 +297,9 @@ def view(
no_header,
no_version,
regions,
regions_file,
targets,
targets_file,
force_samples,
no_update,
samples,
Expand Down Expand Up @@ -310,7 +334,9 @@ def view(
no_header=no_header,
no_version=no_version,
regions=regions,
regions_file=regions_file,
targets=targets,
targets_file=targets_file,
no_update=no_update,
samples=samples,
samples_file=samples_file,
Expand Down
14 changes: 13 additions & 1 deletion vcztools/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,14 @@
import pyparsing as pp

from vcztools import constants, retrieval
from vcztools.regions import parse_regions, parse_targets
from vcztools.samples import parse_samples
from vcztools.utils import missing, open_zarr, vcf_name_to_vcz_names
from vcztools.utils import (
_as_fixed_length_unicode,
missing,
open_zarr,
vcf_name_to_vcz_names,
)


def list_samples(vcz_path, output, zarr_backend_storage=None):
Expand Down Expand Up @@ -299,7 +305,9 @@ def write_query(
*,
query_format: str,
regions=None,
regions_file=None,
targets=None,
targets_file=None,
samples=None,
samples_file=None,
force_samples: bool = False,
Expand All @@ -320,6 +328,10 @@ def write_query(
contigs = root["contig_id"][:]
filters = root["filter_id"][:]

contigs_u = _as_fixed_length_unicode(root["contig_id"][:]).tolist()
regions = parse_regions(regions, contigs_u, regions_file=regions_file)
targets = parse_targets(targets, contigs_u, targets_file=targets_file)

if "\\n" not in query_format and not disable_automatic_newline:
query_format = query_format + "\\n"

Expand Down
Loading
Loading