Skip to content

Commit 52e5063

Browse files
author
王子叶
committed
add bin for bioconda
1 parent fdd0c0c commit 52e5063

3 files changed

Lines changed: 192 additions & 1 deletion

File tree

COMEBin/comebin_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.0.1'
1+
__version__ = '1.0.2'

COMEBin/run_comebin.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
# Author of pipeline: Ziye Wang.
66
# For questions, bugs, and suggestions, contact me at zwang17@fudan.edu.cn
77
##############################################################################################################################################################
8+
VERSION="1.0.2"
89

910
help_message () {
11+
echo ""
12+
echo "COMEBin version: $VERSION"
1013
echo "Usage: bash run_comebin.sh [options] -a contig_file -o output_dir -p bam_file_path"
1114
echo "Options:"
1215
echo ""
@@ -21,6 +24,11 @@ help_message () {
2124
echo " -b INT batch size for training process (default=1024)"
2225
echo "";}
2326

27+
28+
########################################################################################################
29+
######################## LOADING IN THE PARAMETERS AND RUNNING ########################
30+
########################################################################################################
31+
2432
num_threads=5
2533
n_views=6
2634
#temperature=0.15

bin/run_comebin.sh

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
#!/usr/bin/env bash
2+
3+
##############################################################################################################################################################
4+
# This script is meant to be run COMEBin after obtaining the bam files.
5+
# Author of pipeline: Ziye Wang.
6+
# For questions, bugs, and suggestions, contact me at zwang17@fudan.edu.cn
7+
##############################################################################################################################################################
8+
VERSION="1.0.2"
9+
10+
help_message () {
11+
echo ""
12+
echo "COMEBin version: $VERSION"
13+
echo "Usage: bash run_comebin.sh [options] -a contig_file -o output_dir -p bam_file_path"
14+
echo "Options:"
15+
echo ""
16+
echo " -a STR metagenomic assembly file"
17+
echo " -o STR output directory"
18+
echo " -p STR path to access to the bam files"
19+
echo " -n INT number of views for contrastive multiple-view learning (default=6)"
20+
echo " -t INT number of threads (default=5)"
21+
echo " -l FLOAT temperature in loss function (default=0.07 for assemblies with an N50 > 10000, default=0.15 for others)"
22+
echo " -e INT embedding size for comebin network (default=2048)"
23+
echo " -c INT embedding size for coverage network (default=2048)"
24+
echo " -b INT batch size for training process (default=1024)"
25+
echo "";}
26+
27+
run_file_path=$(dirname $(which run_comebin.sh))
28+
29+
if [[ $? -ne 0 ]]; then
30+
echo "cannot find run_comebin.sh file - something went wrong with the installation!"
31+
exit 1
32+
fi
33+
34+
35+
########################################################################################################
36+
######################## LOADING IN THE PARAMETERS AND RUNNING ########################
37+
########################################################################################################
38+
39+
num_threads=5
40+
n_views=6
41+
#temperature=0.15
42+
emb_szs_forcov=2048
43+
emb_szs=2048
44+
batch_size=1024
45+
46+
while getopts a:o:p:n:t:l:e:c:b: OPT; do
47+
case ${OPT} in
48+
a) contig_file=$(realpath ${OPTARG})
49+
;;
50+
o) output_dir=$(realpath ${OPTARG})
51+
;;
52+
p) bam_file_path=$(realpath ${OPTARG})
53+
;;
54+
n) n_views=${OPTARG}
55+
;;
56+
t) num_threads=${OPTARG}
57+
;;
58+
l) temperature=${OPTARG}
59+
;;
60+
e) emb_szs=${OPTARG}
61+
;;
62+
c) emb_szs_forcov=${OPTARG}
63+
;;
64+
b) batch_size=${OPTARG}
65+
;;
66+
\?)
67+
# printf "[Usage] `date '+%F %T'` -i <INPUT_FILE> -o <OUTPUT_DIR> -o <P
68+
#RODUCT_CODE> -s <SOFTWARE_VERSION> -t <TYPE>\n" >&2
69+
exit 1
70+
esac
71+
done
72+
73+
cd ${run_file_path}/COMEBin
74+
75+
# check parameter
76+
if [ -z "${contig_file}" -o -z "${output_dir}" -o -z "${bam_file_path}" ]; then
77+
help_message
78+
exit 1
79+
fi
80+
81+
82+
if [ -z "$temperature" ]; then
83+
# Compute the length of each sequence and sort using the awk command
84+
awk '/^>/ {if (seqlen) print seqlen; seqlen=0; next} {seqlen+=length($0)} END {print seqlen}' "$contig_file" | sort -rn > ${contig_file}_lengths.txt
85+
86+
# CAL N50
87+
total_length=$(awk '{sum+=$1} END {print sum}' ${contig_file}_lengths.txt)
88+
target_length=$(awk -v total="$total_length" 'BEGIN {cutoff=total/2; current=0} {current+=$1; if (current >= cutoff) {print $1; exit}}' ${contig_file}_lengths.txt)
89+
90+
# N50
91+
echo "N50: $target_length"
92+
# Check if N50 is greater than 10000 and set tau accordingly
93+
if [ "$target_length" -gt 10000 ]; then
94+
temperature=0.07
95+
else
96+
temperature=0.15
97+
fi
98+
echo "Tau(temperature): ${temperature}"
99+
else
100+
echo "Tau(temperature): ${temperature}"
101+
fi
102+
103+
104+
105+
106+
########################################################################################################
107+
###### Get augmentation data
108+
########################################################################################################
109+
folder=${output_dir}/data_augmentation
110+
keyword="_datacoverage_mean"
111+
112+
if [ -d "$folder" ]; then
113+
echo "${output_dir}/data_augmentation exists."
114+
count=$(find "$folder" -maxdepth 1 -type f -name "*$keyword*" | wc -l)
115+
echo "Number of files containing '$keyword' in the folder: $count"
116+
if [ "$count" -ne ${n_views} ]; then
117+
echo "Running data augmentation."
118+
python main.py generate_aug_data --contig_file ${contig_file} \
119+
--out_augdata_path ${output_dir}/data_augmentation \
120+
--n_views ${n_views} --bam_file_path ${bam_file_path} --num_threads ${num_threads}
121+
else
122+
echo "No need to run data augmentation."
123+
fi
124+
else
125+
echo "${output_dir}/data_augmentation does not exist."
126+
echo "Running data augmentation."
127+
python main.py generate_aug_data --contig_file ${contig_file} \
128+
--out_augdata_path ${output_dir}/data_augmentation \
129+
--n_views ${n_views} --bam_file_path ${bam_file_path} --num_threads ${num_threads}
130+
fi
131+
132+
if [[ $? -ne 0 ]] ; then echo "Something went wrong with running generating augmentation data. Exiting.";exit 1; fi
133+
134+
########################################################################################################
135+
###### Get representation (training process)
136+
########################################################################################################
137+
folder=${output_dir}/comebin_res
138+
keyword="embeddings.tsv"
139+
140+
if [ -d "$folder" ]; then
141+
echo "${output_dir}/comebin_res exists."
142+
count=$(find "$folder" -maxdepth 1 -type f -name "*$keyword*" | wc -l)
143+
echo "Number of files containing '$keyword' in the folder: $count"
144+
if [ "$count" -ne 2 ]; then
145+
echo "Running getting representation."
146+
python main.py train --data ${output_dir}/data_augmentation \
147+
--temperature ${temperature} --emb_szs_forcov ${emb_szs_forcov} \
148+
--batch_size ${batch_size} --emb_szs ${emb_szs} --n_views ${n_views} \
149+
--add_model_for_coverage \
150+
--output_path ${output_dir}/comebin_res --earlystop --addvars --vars_sqrt
151+
else
152+
echo "No need to run getting representation."
153+
fi
154+
else
155+
echo "${output_dir}/comebin_res does not exist."
156+
echo "Running getting representation."
157+
python main.py train --data ${output_dir}/data_augmentation \
158+
--temperature ${temperature} --emb_szs_forcov ${emb_szs_forcov} \
159+
--batch_size ${batch_size} --emb_szs ${emb_szs} --n_views ${n_views} \
160+
--add_model_for_coverage \
161+
--output_path ${output_dir}/comebin_res --earlystop --addvars --vars_sqrt
162+
fi
163+
164+
165+
if [[ $? -ne 0 ]] ; then echo "Something went wrong with running training network. Exiting.";exit 1; fi
166+
167+
########################################################################################################
168+
#### Clustering (run Leiden-based clustering methods and get the final result)
169+
########################################################################################################
170+
emb_file=${output_dir}/comebin_res/embeddings.tsv
171+
seed_file=${contig_file}.bacar_marker.2quarter_lencutoff_1001.seed
172+
173+
python main.py bin --contig_file ${contig_file} \
174+
--emb_file ${emb_file} \
175+
--output_path ${output_dir}/comebin_res \
176+
--seed_file ${seed_file} --num_threads ${num_threads}
177+
178+
python main.py get_result --contig_file ${contig_file} \
179+
--output_path ${output_dir}/comebin_res \
180+
--seed_file ${seed_file} --num_threads ${num_threads}
181+
182+
if [[ $? -ne 0 ]] ; then echo "Something went wrong with running clustering. Exiting.";exit 1; fi
183+

0 commit comments

Comments
 (0)