Skip to content

Commit 210ea34

Browse files
Merge branch 'feat/multiref' into fix/cva16-build-files
2 parents b8d2fec + f40ac06 commit 210ea34

File tree

1,089 files changed

+1443807
-131360
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,089 files changed

+1443807
-131360
lines changed

.github/workflows/build-and-deploy.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,14 @@ name: build-and-deploy
22

33
on:
44
push:
5-
branches:
6-
- master
7-
- staging
8-
- release
95
pull_request:
106
repository_dispatch:
117
types: build-and-deploy
128
workflow_dispatch:
139
workflow_call:
1410

1511
concurrency:
16-
group: cli-${{ github.workflow }}-${{ github.ref_type }}-${{ github.event.pull_request.number || github.ref || github.run_id }}
12+
group: cli-${{ github.workflow }}-${{ github.ref_type }}-${{ github.head_ref || github.ref_name || github.run_id }}
1713
cancel-in-progress: true
1814

1915
defaults:

.github/workflows/validate-pr.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: "validate-pr"
2+
3+
on:
4+
pull_request:
5+
6+
permissions:
7+
contents: "read"
8+
9+
concurrency:
10+
group: "validate-pr-${{ github.event.pull_request.number || github.run_id }}"
11+
cancel-in-progress: true
12+
13+
defaults:
14+
run:
15+
shell: "bash -euxo pipefail {0}"
16+
17+
env:
18+
PYTHONUNBUFFERED: "1"
19+
20+
jobs:
21+
validate:
22+
runs-on: "ubuntu-24.04"
23+
24+
steps:
25+
- name: "Checkout PR code"
26+
uses: "actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683" # v4.2.2
27+
with:
28+
repository: "${{ github.event.pull_request.head.repo.full_name }}"
29+
ref: "${{ github.event.pull_request.head.ref }}"
30+
fetch-depth: 0
31+
fetch-tags: true
32+
33+
- name: "Install system dependencies"
34+
run: "sudo apt-get install genometools python3 --yes -qq >/dev/null"
35+
36+
- name: "Install Python dependencies"
37+
run: "pip3 install -r requirements.txt"
38+
39+
- name: "Validate GFF files"
40+
run: "./scripts/validate-gff 'data/'"
41+
42+
- name: "Rebuild (validation only)"
43+
id: "rebuild"
44+
continue-on-error: true
45+
run: "./scripts/rebuild --input-dir 'data/' --output-dir 'data_output/' --no-pull --allow-dirty"
46+
47+
- name: "Upload validation artifacts"
48+
if: "always()"
49+
uses: "actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02" # v4.6.2
50+
with:
51+
name: "validation-output"
52+
path: "data_output/"
53+
54+
- name: "Fail if rebuild or validation found errors"
55+
if: "steps.rebuild.outcome == 'failure'"
56+
run: "exit 1"

data/community/collection.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"community/pathoplexus/cchfv/M",
3838
"community/v-gen-lab/chikV/genotypes",
3939
"community/masphl-bioinformatics/hav/vp1-2b-junction",
40-
"community/masphl-bioinformatics/hav/whole-genome"
40+
"community/masphl-bioinformatics/hav/whole-genome",
41+
"community/itps/zikav"
4142
]
4243
}

data/community/isuvdl/mazeller/prrsv1/orf5/yimim2025/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## Unreleased
2+
3+
- Remove invalid `qc.missingData.scoreWeight` and `qc.mixedSites.scoreWeight`
4+
15
## 2025-09-09T12:13:13Z
26

37
Add schema definition url to `pathogen.json`. This is a purely technical change, for convenience of dataset authors. The data itself is not modified.

data/community/isuvdl/mazeller/prrsv1/orf5/yimim2025/pathogen.json

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@
2727
"missingData": {
2828
"enabled": true,
2929
"missingDataThreshold": 2000,
30-
"scoreBias": 500,
31-
"scoreWeight": 50
30+
"scoreBias": 500
3231
},
3332
"snpClusters": {
3433
"enabled": true,
@@ -38,8 +37,7 @@
3837
},
3938
"mixedSites": {
4039
"enabled": true,
41-
"mixedSitesThreshold": 15,
42-
"scoreWeight": 80
40+
"mixedSitesThreshold": 15
4341
},
4442
"frameShifts": {
4543
"enabled": true,

data/community/isuvdl/mazeller/prrsv2/orf5/yimim2023/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## Unreleased
2+
3+
- Unify attributes per schema update
4+
15
## 2025-09-09T12:13:13Z
26

37
Add schema definition url to `pathogen.json`. This is a purely technical change, for convenience of dataset authors. The data itself is not modified.

data/community/isuvdl/mazeller/prrsv2/orf5/yimim2023/pathogen.json

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,13 @@
33
"attributes": {
44
"name": "PRRSV-2 ORF5 Lineages, Yim-im & Zhang 2023 Microbiol Spectr",
55
"reference accession": "DQ478308.1",
6-
"reference name": "PRRSV0004437"
6+
"reference name": "PRRSV0004437",
7+
"experimental": true
78
},
89
"compatibility": {
910
"cli": "3.0.0-alpha.0",
1011
"web": "3.0.0-alpha.0"
1112
},
12-
"deprecated": false,
13-
"enabled": true,
14-
"experimental": true,
1513
"files": {
1614
"changelog": "CHANGELOG.md",
1715
"examples": "sequences.fasta",
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
## Unreleased
2+
3+
- Move `placementMaskRanges` to tree.json
4+
- Unify attributes per schema update
5+
6+
## 2026-03-04T12:40:26Z
7+
8+
Initial release of a Zika Virus (zikav) dataset for genotype classification!
9+
10+
Read more about Nextclade datasets in the documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.htmls
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Nextclade Dataset for Zika virus Based on Reference "Natal RGN"
2+
3+
## Dataset Attributes
4+
5+
| Attribute | Value |
6+
| -------------------- | ---------------------------------------- |
7+
| Name | Zika virus |
8+
| RefName | Natal RGN |
9+
| RefAccession | NC_035889.1 |
10+
11+
## Scope of This Dataset
12+
13+
The dataset aims to enable rapid and accurate identification of the two main Zika virus genotypes, improving the resolution and speed of phylogenetic classification in routine surveillance networks.
14+
15+
The source code is available at [InstitutoTodosPelaSaude/nextclade-datasets-workflows](https://github.com/InstitutoTodosPelaSaude/nextclade-datasets-workflows/tree/main/zikav).
16+
17+
For bugs, please open an [issue](https://github.com/InstitutoTodosPelaSaude/nextclade-datasets-workflows/issues).
18+
19+
Read more about Nextclade datasets in the Nextclade documentation: [Nextclade Datasets](https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html).
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
##gff-version 3
2+
#!gff-spec-version 1.21
3+
#!processor NCBI annotwriter
4+
##sequence-region NC_035889.1 1 10808
5+
##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=64320
6+
NC_035889.1 RefSeq region 1 10808 . + . ID=NC_035889.1:1..10808;Dbxref=taxon:64320;collection-date=2015;country=Brazil: Rio Grande do Norte%2C Natal;gbkey=Src;genome=genomic;isolation-source=fetus' brain autopsy;mol_type=genomic RNA;nat-host=Homo sapiens;strain=Natal RGN
7+
NC_035889.1 RefSeq region 1 10808 . + . ID=id-NC_035889.1:1..10808;Note=Mature peptides were annotated by RefSeq staff using homoloy to NC_012532.1 and the cleavage sites reported in Kuno and Chang%2C 2007 (PMID 17195954). Questions about the annotation of this sequence should be directed to info@ncbi.nlm.nih.gov.;gbkey=Comment
8+
NC_035889.1 GenBank CDS 108 473 . + . gene_name=ancC
9+
NC_035889.1 GenBank CDS 108 419 . + . gene_name=C
10+
NC_035889.1 GenBank CDS 474 977 . + . gene_name=prM
11+
NC_035889.1 GenBank CDS 474 752 . + . gene_name=pr
12+
NC_035889.1 GenBank CDS 753 977 . + . gene_name=M
13+
NC_035889.1 GenBank CDS 978 2489 . + . gene_name=E
14+
NC_035889.1 GenBank CDS 2490 3545 . + . gene_name=NS1
15+
NC_035889.1 GenBank CDS 3546 4223 . + . gene_name=NS2A
16+
NC_035889.1 GenBank CDS 4224 4613 . + . gene_name=NS2B
17+
NC_035889.1 GenBank CDS 4614 6464 . + . gene_name=NS3
18+
NC_035889.1 GenBank CDS 6465 6845 . + . gene_name=NS4A
19+
NC_035889.1 GenBank CDS 6846 6914 . + . gene_name=2K
20+
NC_035889.1 GenBank CDS 6915 7667 . + . gene_name=NS4B
21+
NC_035889.1 GenBank CDS 7668 10376 . + . gene_name=NS5

0 commit comments

Comments
 (0)