Skip to content

Commit e71db5f

Browse files
committed
small pipeline change to match prepare-blast behav
1 parent 6066c29 commit e71db5f

File tree

4 files changed

+21
-15
lines changed

4 files changed

+21
-15
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ PROBEst can be run using the following command:
3131

3232
```bash
3333
python pipeline.py \
34-
-i {INPUT} \
35-
-tb {TRUE_BASE} \
36-
-fb [FALSE_BASE ...] \
37-
-c {CONTIG_TABLE} \
38-
-o {OUTPUT}
34+
-i {INPUT} \ # fasta // .fa.gz
35+
-tb {TRUE_BASE} \ # directory with blastn database / fasta files
36+
-fb [FALSE_BASE ...] \ # directories with blastn database / fasta files
37+
-c {CONTIG_TABLE} \ # .tsv table with BLAST database information (optional; defaults to `{OUTPUT}/contigs.tsv` when using FASTA directories).
38+
-o {OUTPUT} \ # output directory
3939
```
4040

4141
**Blastn databases** and **contig table** are produced by ```prep_db.sh``` (or built automatically when `-tb` / `-fb` point at **directories of FASTA files**). After each automatic build, the contig table is **deduplicated by contig ID** (duplicate rows from merged FASTAs or repeated runs keep the last mapping).

pipeline.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,11 @@ def merge_iter(iter: int):
182182

183183
print("Positive hits counted")
184184

185-
# false bases
186-
for db_neg in args.false_base:
187-
blastn_db = blastn_iter + " -db " + db_neg + \
188-
" >> " + out_dir(iter) + "negative_hits.tsv"
185+
# false bases: first DB uses '>' so negative_hits.tsv is not appended to stale files
186+
neg_path = out_dir(iter) + "negative_hits.tsv"
187+
for i, db_neg in enumerate(args.false_base):
188+
redir = " > " if i == 0 else " >> "
189+
blastn_db = blastn_iter + " -db " + db_neg + redir + neg_path
189190
subprocess.run(blastn_db, shell=True)
190191

191192
print("Negative hits counted")

src/PROBESt/args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def arguments_parse():
7373
parser.add_argument("-c", "--contig_table",
7474
required=False,
7575
default=None,
76-
help="Path to a .tsv table containing BLAST database information. If not provided and FASTA directories are used for bases, it will be auto-generated in the output directory.")
76+
help="Path to the genome/contig .tsv (prep_db.sh -c format). If omitted, defaults to <output>/contigs.tsv. When -tb/-fb are FASTA directories, all bases contribute rows to this file (same as running prep_db.sh with one shared -c).")
7777

7878
parser.add_argument("-o", "--output",
7979
required=True,

tests/test_pipeline_general_integration.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2424
# SOFTWARE.
2525

26-
"""Integration test equivalent to repo root test.sh (general FISH dataset)."""
26+
"""Integration test: general FISH dataset with JIT FASTA bases, AI + visualization."""
2727

2828
import shutil
2929
import subprocess
@@ -42,8 +42,8 @@ def _require_blast_tools():
4242

4343

4444
@pytest.mark.integration
45-
def test_pipeline_general_fish_from_test_sh(tmp_path):
46-
"""Mirror `test.sh` snippet: small general run (AI enabled by default, visualize disabled)."""
45+
def test_integration_fish(tmp_path):
46+
"""Same CLI as: …/test.fna, fasta_base true/false dirs, -N 3, --visualize True --AI True."""
4747
_require_blast_tools()
4848

4949
output = tmp_path / "output"
@@ -67,6 +67,10 @@ def test_pipeline_general_fish_from_test_sh(tmp_path):
6767
"5",
6868
"-N",
6969
"3",
70+
"--visualize",
71+
"True",
72+
"--AI",
73+
"True",
7074
]
7175

7276
result = subprocess.run(
@@ -83,5 +87,6 @@ def test_pipeline_general_fish_from_test_sh(tmp_path):
8387

8488
assert (output / "modeling_results.tsv").is_file()
8589
assert (output / "output_dedegenerated.fa").is_file()
86-
# Visualizations are only created when `--visualize` is enabled.
87-
assert not (output / "visualizations").exists()
90+
viz_dir = output / "visualizations"
91+
assert viz_dir.is_dir()
92+
assert any(viz_dir.glob("*_visualization.png"))

0 commit comments

Comments
 (0)