Skip to content

Commit 3ce66f0

Browse files
committed
workflow updates to enable input arguments
1 parent b4c1a57 commit 3ce66f0

File tree

2 files changed

+73
-17
lines changed

2 files changed

+73
-17
lines changed

workflows/train_bias_model.sh

+72-16
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,62 @@ cleanup() {
1919
# echo an error message before exiting
2020
trap 'cleanup' EXIT INT TERM
2121

22+
while getopts i:t:d:g:c:p:n:f:b:o:s:h? flag
23+
24+
do
25+
case "${flag}" in
26+
i) input_file=${OPTARG}
27+
;;
28+
t) input_type=${OPTARG}
29+
;;
30+
d) data_type=${OPTARG}
31+
;;
32+
g) reference_fasta=${OPTARG}
33+
;;
34+
c) chrom_sizes=${OPTARG}
35+
;;
36+
p) peaks=${OPTARG}
37+
;;
38+
n) nonpeaks=${OPTARG}
39+
;;
40+
f) fold=${OPTARG}
41+
;;
42+
b) bias_threshol=${OPTARG}
43+
;;
44+
o) output_dir=${OPTARG}
45+
;;
46+
s) seed=${OPTARG}
47+
;;
48+
h) echo "script usage: $0 [-i input_file] [-t bam_or_fragment_or_tagalign] [-d ATAC_or_DNASE] [-g genome_fasta] [-c chrom_sizes] [-p peaks_bed] [-n nonpeaks_bed] [-f folds_json] [-b bias_model_h5] [-o output_dir_path]"
49+
exit
50+
;;
51+
?) echo "script usage: $0 [-i input_file] [-t bam_or_fragment_or_tagalign] [-d ATAC_or_DNASE] [-g genome_fasta] [-c chrom_sizes] [-p peaks_bed] [-n nonpeaks_bed] [-f folds_json] [-b bias_model_h5] [-o output_dir_path]"
52+
exit
53+
;;
54+
*) echo "Invalid option: -$flag"
55+
exit 1
56+
;;
57+
58+
esac
59+
done
60+
61+
input_file=${input_file?param missing - input file path missing - should be bam, fragment or tagalign file}
62+
input_type=${input_type?param missing - input type missing - should be string with value bam, fragment or tagalign}
63+
data_type=${data_type?param missing - data_type is ATAC or DNASE}
64+
reference_fasta=${reference_fasta?param missing - reference genome file missing}
65+
chrom_sizes=${chrom_sizes?param missing - reference genome chrom sizes file missing}
66+
peaks=${peaks?param missing - peaks bed file missing}
67+
nonpeaks=${nonpeaks?param missing - nonpeaks bed file missing}
68+
fold=${fold?param missing - fold json missing}
69+
output_dir=${output_dir?param missing - output_dir path missing}
70+
71+
2272
# input files
2373

24-
in_bam=${1?param missing - in_bam}
25-
data_type=${3?param missing - data_type}
26-
reference_fasta=${4?param missing - reference_fasta}
27-
chrom_sizes=${5?param missing - chrom_sizes}
28-
peaks=${3?param missing - peaks}
29-
nonpeaks=${4?param missing - nonpeaks}
30-
fold=${5?param missing - fold}
31-
output_dir=${7?param missing - output_dir}
74+
seed=${seed:-1234} # optional
75+
bias_threshold_factor=${bias_threshol:-0.5} # optional
3276

33-
filters=${8:-128} # optional
34-
n_dilation_layers=${9:-4} # optional
35-
seed=${10:-1234} # optional
77+
echo $bias_threshold_factor
3678

3779
## output dirs
3880

@@ -84,15 +126,30 @@ function timestamp {
84126
logfile=$output_dir/logs/"preprocessing.log"
85127
touch $logfile
86128

87-
echo $( timestamp ): "chrombpnet_makebigwig -g $reference_fasta -ibam $in_bam -c $chrom_sizes -o $bigwig_prefix -d $data_type" | tee -a $logfile
88-
chrombpnet_makebigwig -g $reference_fasta -ibam $in_bam -c $chrom_sizes -o $bigwig_prefix -d $data_type
89-
echo $( timestamp ): "chrombpnet_pwm_from_bigwig -i $bigwig_prefix_unstranded.bw -g $reference_fasta -o $bigwig_prefix_bias_pwm -c chr20 -cz $chrom_sizes" | tee -a $logfile
129+
if [ $input_type == "bam" ]
130+
then
131+
echo $( timestamp ): "chrombpnet_makebigwig -g $reference_fasta -ibam $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type" | tee -a $logfile
132+
chrombpnet_makebigwig -g $reference_fasta -ibam $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type
133+
elif [ $input_type == "fragment" ]
134+
then
135+
echo $( timestamp ): "chrombpnet_makebigwig -g $reference_fasta -ifrag $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type" | tee -a $logfile
136+
chrombpnet_makebigwig -g $reference_fasta -ifrag $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type
137+
elif [ $input_type == "tagalign" ]
138+
then
139+
echo $( timestamp ): "chrombpnet_makebigwig -g $reference_fasta -itag $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type" | tee -a $logfile
140+
chrombpnet_makebigwig -g $reference_fasta -itag $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type
141+
else
142+
echo "Unknown data type: "$input_type
143+
fi
144+
echo $( timestamp ): "chrombpnet_pwm_from_bigwig -i $bigwig_path -g $reference_fasta -o $bigwig_prefix_bias_pwm -c chr20 -cz $chrom_sizes" | tee -a $logfile
90145
chrombpnet_pwm_from_bigwig -i $bigwig_prefix"_unstranded.bw" -g $reference_fasta -o $output_dir/evaluation/"pwm_from_input" -c "chr20" -cz $chrom_sizes
91146

92147

93148
# defaults
94149
inputlen=2114
95150
outputlen=1000
151+
filters=128
152+
n_dilation_layers=4
96153

97154
function timestamp {
98155
# Function to get the current time with the new line character
@@ -106,7 +163,6 @@ function timestamp {
106163
logfile=$output_dir"/logs/train_bias_model.log"
107164
touch $logfile
108165

109-
fi
110166

111167
# this script does the following -
112168
# (1) filters your peaks/nonpeaks (removes outliers and removes edge cases and creates a new filtered set)
@@ -206,7 +262,7 @@ function timestamp {
206262
}
207263

208264

209-
shuf --random-source=<(yes 42) -n 30000 $peaks > $output_dir/intermediates/30K.subsample.peaks.bed
265+
shuf --random-source=<(yes 42) -n 300 $peaks > $output_dir/intermediates/30K.subsample.peaks.bed
210266
interpret_regions=$output_dir/intermediates/30K.subsample.peaks.bed
211267

212268

workflows/train_chrombpnet_model.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ then
142142
else
143143
echo "Unknown data type: "$input_type
144144
fi
145-
echo $( timestamp ): "chrombpnet_pwm_from_bigwig -i $bigwig_prefix_unstranded.bw -g $reference_fasta -o $bigwig_prefix_bias_pwm -c chr20 -cz $chrom_sizes" | tee -a $logfile
145+
echo $( timestamp ): "chrombpnet_pwm_from_bigwig -i $bigwig_path -g $reference_fasta -o $bigwig_prefix_bias_pwm -c chr20 -cz $chrom_sizes" | tee -a $logfile
146146
chrombpnet_pwm_from_bigwig -i $bigwig_prefix"_unstranded.bw" -g $reference_fasta -o $output_dir/evaluation/"pwm_from_input" -c "chr20" -cz $chrom_sizes
147147

148148

0 commit comments

Comments
 (0)