@@ -19,20 +19,62 @@ cleanup() {
19
19
# echo an error message before exiting
20
20
trap ' cleanup' EXIT INT TERM
21
21
22
+ while getopts i:t:d:g:c:p:n:f:b:o:s:h? flag
23
+
24
+ do
25
+ case " ${flag} " in
26
+ i) input_file=${OPTARG}
27
+ ;;
28
+ t) input_type=${OPTARG}
29
+ ;;
30
+ d) data_type=${OPTARG}
31
+ ;;
32
+ g) reference_fasta=${OPTARG}
33
+ ;;
34
+ c) chrom_sizes=${OPTARG}
35
+ ;;
36
+ p) peaks=${OPTARG}
37
+ ;;
38
+ n) nonpeaks=${OPTARG}
39
+ ;;
40
+ f) fold=${OPTARG}
41
+ ;;
42
+ b) bias_threshol=${OPTARG}
43
+ ;;
44
+ o) output_dir=${OPTARG}
45
+ ;;
46
+ s) seed=${OPTARG}
47
+ ;;
48
+ h) echo " script usage: $0 [-i input_file] [-t bam_or_fragment_or_tagalign] [-d ATAC_or_DNASE] [-g genome_fasta] [-c chrom_sizes] [-p peaks_bed] [-n nonpeaks_bed] [-f folds_json] [-b bias_model_h5] [-o output_dir_path]"
49
+ exit
50
+ ;;
51
+ ? ) echo " script usage: $0 [-i input_file] [-t bam_or_fragment_or_tagalign] [-d ATAC_or_DNASE] [-g genome_fasta] [-c chrom_sizes] [-p peaks_bed] [-n nonpeaks_bed] [-f folds_json] [-b bias_model_h5] [-o output_dir_path]"
52
+ exit
53
+ ;;
54
+ * ) echo " Invalid option: -$flag "
55
+ exit 1
56
+ ;;
57
+
58
+ esac
59
+ done
60
+
61
+ input_file=${input_file?param missing - input file path missing - should be bam, fragment or tagalign file}
62
+ input_type=${input_type?param missing - input type missing - should be string with value bam, fragment or tagalign}
63
+ data_type=${data_type?param missing - data_type is ATAC or DNASE}
64
+ reference_fasta=${reference_fasta?param missing - reference genome file missing}
65
+ chrom_sizes=${chrom_sizes?param missing - reference genome chrom sizes file missing}
66
+ peaks=${peaks?param missing - peaks bed file missing}
67
+ nonpeaks=${nonpeaks?param missing - nonpeaks bed file missing}
68
+ fold=${fold?param missing - fold json missing}
69
+ output_dir=${output_dir?param missing - output_dir path missing}
70
+
71
+
22
72
# input files
23
73
24
- in_bam=${1?param missing - in_bam}
25
- data_type=${3?param missing - data_type}
26
- reference_fasta=${4?param missing - reference_fasta}
27
- chrom_sizes=${5?param missing - chrom_sizes}
28
- peaks=${3?param missing - peaks}
29
- nonpeaks=${4?param missing - nonpeaks}
30
- fold=${5?param missing - fold}
31
- output_dir=${7?param missing - output_dir}
74
+ seed=${seed:- 1234} # optional
75
+ bias_threshold_factor=${bias_threshol:- 0.5} # optional
32
76
33
- filters=${8:- 128} # optional
34
- n_dilation_layers=${9:- 4} # optional
35
- seed=${10:- 1234} # optional
77
+ echo $bias_threshold_factor
36
78
37
79
# # output dirs
38
80
@@ -84,15 +126,30 @@ function timestamp {
84
126
logfile=$output_dir /logs/" preprocessing.log"
85
127
touch $logfile
86
128
87
- echo $( timestamp ) : " chrombpnet_makebigwig -g $reference_fasta -ibam $in_bam -c $chrom_sizes -o $bigwig_prefix -d $data_type " | tee -a $logfile
88
- chrombpnet_makebigwig -g $reference_fasta -ibam $in_bam -c $chrom_sizes -o $bigwig_prefix -d $data_type
89
- echo $( timestamp ) : " chrombpnet_pwm_from_bigwig -i $bigwig_prefix_unstranded .bw -g $reference_fasta -o $bigwig_prefix_bias_pwm -c chr20 -cz $chrom_sizes " | tee -a $logfile
129
+ if [ $input_type == " bam" ]
130
+ then
131
+ echo $( timestamp ) : " chrombpnet_makebigwig -g $reference_fasta -ibam $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type " | tee -a $logfile
132
+ chrombpnet_makebigwig -g $reference_fasta -ibam $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type
133
+ elif [ $input_type == " fragment" ]
134
+ then
135
+ echo $( timestamp ) : " chrombpnet_makebigwig -g $reference_fasta -ifrag $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type " | tee -a $logfile
136
+ chrombpnet_makebigwig -g $reference_fasta -ifrag $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type
137
+ elif [ $input_type == " tagalign" ]
138
+ then
139
+ echo $( timestamp ) : " chrombpnet_makebigwig -g $reference_fasta -itag $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type " | tee -a $logfile
140
+ chrombpnet_makebigwig -g $reference_fasta -itag $input_file -c $chrom_sizes -o $bigwig_prefix -d $data_type
141
+ else
142
+ echo " Unknown data type: " $input_type
143
+ fi
144
+ echo $( timestamp ) : " chrombpnet_pwm_from_bigwig -i $bigwig_path -g $reference_fasta -o $bigwig_prefix_bias_pwm -c chr20 -cz $chrom_sizes " | tee -a $logfile
90
145
chrombpnet_pwm_from_bigwig -i $bigwig_prefix " _unstranded.bw" -g $reference_fasta -o $output_dir /evaluation/" pwm_from_input" -c " chr20" -cz $chrom_sizes
91
146
92
147
93
148
# defaults
94
149
inputlen=2114
95
150
outputlen=1000
151
+ filters=128
152
+ n_dilation_layers=4
96
153
97
154
function timestamp {
98
155
# Function to get the current time with the new line character
@@ -106,7 +163,6 @@ function timestamp {
106
163
logfile=$output_dir " /logs/train_bias_model.log"
107
164
touch $logfile
108
165
109
- fi
110
166
111
167
# this script does the following -
112
168
# (1) filters your peaks/nonpeaks (removes outliers and removes edge cases and creates a new filtered set)
@@ -206,7 +262,7 @@ function timestamp {
206
262
}
207
263
208
264
209
- shuf --random-source=<( yes 42) -n 30000 $peaks > $output_dir /intermediates/30K.subsample.peaks.bed
265
+ shuf --random-source=<( yes 42) -n 300 $peaks > $output_dir /intermediates/30K.subsample.peaks.bed
210
266
interpret_regions=$output_dir /intermediates/30K.subsample.peaks.bed
211
267
212
268
0 commit comments