@@ -19,6 +19,8 @@ ILLUMINA_XDROP=100
19
19
PACBIO_XDROP=27
20
20
ILLUMINA_SEED_LENGTH=10000
21
21
PACBIO_SEED_LENGTH=19
22
+ PACBIO_MIN_EXACT_MATCH=0.02
23
+ ILLUMINA_MIN_EXACT_MATCH=0.7
22
24
23
25
echo " Simulating Illumina reads"
24
26
art_illumina -ss HS25 -i $INFILE -p -c 1280 -l $ILLUMINA_LENGTH -m 200 -s 10 -o $BASENAME .sim -rs $SEED -sam
@@ -28,8 +30,8 @@ bedtools getfasta -fi $BASENAME.fa -bed <(bedtools bamtobed -i $BASENAME.sim.bam
28
30
reformat.sh in=$BASENAME .sim.illumina.fq out=$BASENAME .sim.illumina.fa fastawrap=-1 overwrite=true
29
31
30
32
echo " Simulating PacBio reads"
31
- PBSIM_DEPTH=0.0504 # chr22
32
- # PBSIM_DEPTH=0.4284 # E. coli
33
+ PBSIM_DEPTH=0.0504 # illumina
34
+ PBSIM_DEPTH=0.4284 # pacbio
33
35
../pbsim --data-type CLR --depth $PBSIM_DEPTH --model_qc ../model_qc_clr --data-type CLR --length-mean $PACBIO_LENGTH --length-sd 0 --length-min $PACBIO_LENGTH --length-max $PACBIO_LENGTH $BASENAME .fa --prefix $BASENAME --seed $SEED
34
36
cat ${BASENAME} _0001.fastq | paste - - - - | awk ' !($2~/N/)' | shuf --random-source=../seed | tr " \t" " \n" > $BASENAME .sim.pacbio.fq
35
37
cat ${BASENAME} _0001.maf | paste - - - - | awk -F" \t" ' !($(NF-1)~/N/)' | shuf --random-source=../seed | tr " \t" " \n" > $BASENAME .sim.maf
@@ -41,10 +43,12 @@ if [[ $a == "illumina" ]]; then
41
43
LENGTH=$ILLUMINA_LENGTH
42
44
XDROP=$ILLUMINA_XDROP
43
45
SEED_LENGTH=$ILLUMINA_SEED_LENGTH
46
+ MIN_EXACT_MATCH=$ILLUMINA_MIN_EXACT_MATCH
44
47
else
45
48
LENGTH=$PACBIO_LENGTH
46
49
XDROP=$PACBIO_XDROP
47
50
SEED_LENGTH=$PACBIO_SEED_LENGTH
51
+ MIN_EXACT_MATCH=$PACBIO_MIN_EXACT_MATCH
48
52
fi
49
53
50
54
echo " Aligning to whole graph"
@@ -54,7 +58,7 @@ echo " parasail"
54
58
../run_parasail.sh $BASENAME $a $LENGTH metagraph_base.out > $BASENAME .sim.$a .metagraph_base.out.results.sam
55
59
56
60
echo " Aligning by coords"
57
- /usr/bin/time -v $METAGRAPH align $FLAGS --align-chain --align-max-seed-length $SEED_LENGTH --align-xdrop 100 -i $BASENAME .dbg $BASENAME .sim.$a .fq -a $BASENAME .${COORD_TYPE} _coord.annodbg > $BASENAME .sim.$a .metagraph_coord.out 2> $BASENAME .sim.$a .metagraph_coord.log
61
+ /usr/bin/time -v $METAGRAPH align $FLAGS --align-chain --align-min-exact-match $MIN_EXACT_MATCH --align- max-seed-length $SEED_LENGTH --align-xdrop 100 -i $BASENAME .dbg $BASENAME .sim.$a .fq -a $BASENAME .${COORD_TYPE} _coord.annodbg > $BASENAME .sim.$a .metagraph_coord.out 2> $BASENAME .sim.$a .metagraph_coord.log
58
62
awk -F" \t" ' {print ">1\n"$4}' $BASENAME .sim.$a .metagraph_coord.out > $BASENAME .sim.$a .metagraph_coord.out.fa
59
63
echo " parasail"
60
64
../run_parasail.sh $BASENAME $a $LENGTH metagraph_coord.out > $BASENAME .sim.$a .metagraph_coord.out.results.sam
@@ -84,5 +88,12 @@ bedtools getfasta -fi $BASENAME.fa -bed <(bedtools bamtobed -i $BASENAME.sim.$a.
84
88
echo " parasail"
85
89
../run_parasail.sh $BASENAME $a $LENGTH pufferfish.sam > $BASENAME .sim.$a .pufferfish.sam.results.sam
86
90
91
+ echo " GraphAligner"
92
+ /usr/bin/time -v GraphAligner -g $BASENAME .gfa -f $BASENAME .sim.$a .fa -x vg -a temp.$a .gaf > $BASENAME .sim.$a .graphaligner.log 2>&1
93
+ cat $BASENAME .sim.$a .fa | paste - - | cut -f1 | tr -d " >" | while read F; do echo " $( grep -F $F " " temp.$a .gaf) " | head -n 1; done > $BASENAME .sim.$a .graphaligner.gaf
94
+ samtools faidx $BASENAME .fa $( awk -F" \t" ' {if (NF==0){printf("' $BASENAME ' :1-1 ")} else {left=$3; right=$2-$4; if ($6 == ">s1") { printf("' $BASENAME ' :%d-%d ",$8+1-left,$9+right)} else {printf("' $BASENAME ' :%d-%d ",$7-$9+1-right,$7-$8+left)} }}' $BASENAME .sim.$a .graphaligner.gaf) -n 100000 | paste - - | awk ' {print ">1\n"$2}' | tr -d " N" > $BASENAME .sim.$a .graphaligner.gaf.fa
95
+ echo " parasail"
96
+ ../run_parasail.sh $BASENAME $a $LENGTH graphaligner.gaf > $BASENAME .sim.$a .graphaligner.gaf.results.sam
97
+
87
98
done
88
99
0 commit comments