Skip to content

Commit

Permalink
fix: MergeBamAlignment creates invalid BAMs in the presence of second…
Browse files Browse the repository at this point in the history
…ary alignments
  • Loading branch information
DenisVerkhoturov committed Sep 22, 2017
1 parent a731fdb commit 642393c
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
/**
* This strategy was designed for TopHat output, but could be of general utility. It picks the alignment with best MAPQ.
* If paired-end, it is the alignment in which the sum of the MAPQs of both ends is the best. In case of ties, one
* is selected arbitrarily. This strategy expects pair-aware alignments, with the corresponding alignment for each
* mate of the pair correlated by HI (hit index) tag. If the aligner has set a pair of alignments as primary, this
* is selected arbitrarily. If the aligner has set a pair of alignments as primary, this
* is used (assuming one of those alignments is not filtered out). Otherwise the alignment pair with best MapQ is
* selected.
*/
Expand All @@ -49,7 +48,7 @@ public class BestMapqPrimaryAlignmentSelectionStrategy implements PrimaryAlignme
public void pickPrimaryAlignment(final HitsForInsert hits) {

if (hits.numHits() == 0) throw new IllegalArgumentException("No alignments to pick from");
hits.coordinateByHitIndex();
hits.coordinateByMate();
// See if primary alignment is not already unambiguously determined.
final NumPrimaryAlignmentState firstEndAlignmentState = hits.tallyPrimaryAlignments(true);
final NumPrimaryAlignmentState secondEndAlignmentState = hits.tallyPrimaryAlignments(false);
Expand Down
63 changes: 62 additions & 1 deletion src/main/java/picard/sam/HitsForInsert.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;

/**
* Holds all the hits (alignments) for a read or read pair. For single-end reads, all the alignments are
Expand Down Expand Up @@ -217,8 +218,12 @@ public void coordinateByHitIndex() {
secondOfPair.add(i, null);
}
}
}

// Now renumber any correlated alignments, and remove hit index if no correlated read.
/**
* Renumber any correlated alignments, and remove hit index if no correlated read.
*/
private void renumberHitIndex() {
int hi = 0;
for (int i = 0; i < numHits(); ++i) {
final SAMRecord first = getFirstOfPair(i);
Expand All @@ -235,6 +240,62 @@ public void coordinateByHitIndex() {
}
}

/**
* This method lines up alignments in firstOfPairOrFragment and secondOfPair lists so that the first and the second
* records of each pair have the same index.
*/
void coordinateByMate() {
final List<SAMRecord> newFirstOfPairOrFragment = new ArrayList<>();
final List<SAMRecord> newSecondOfPair = new ArrayList<>();

for (int i = 0; i < firstOfPairOrFragment.size(); i++) {
final SAMRecord first = firstOfPairOrFragment.get(i);
newFirstOfPairOrFragment.add(i, first);
newSecondOfPair.add(null);

for (int k = 0; k < secondOfPair.size(); k++) {
final SAMRecord second = secondOfPair.get(k);
if (arePair(first, second)) {
newSecondOfPair.set(i, second);
secondOfPair.set(k, null);
break;
}
}
}

for (int i = 0; i < secondOfPair.size(); i++) {
if (secondOfPair.get(i) != null) {
newFirstOfPairOrFragment.add(i, null);
newSecondOfPair.add(i, secondOfPair.get(i));
}
}

firstOfPairOrFragment.clear();
firstOfPairOrFragment.addAll(newFirstOfPairOrFragment);
secondOfPair.clear();
secondOfPair.addAll(newSecondOfPair);

renumberHitIndex();
}

/**
* Identifies weather records present pairwise alignment or not.
*
* It is unnecessary to check QNAME here cause an object of {@code HitsForInsert}
* presents only records with the same QNAME.
*
* @return {@code true} if records belong to the same pairwise alignment
*/
private static boolean arePair(final SAMRecord first, final SAMRecord second) {
return first != null && second != null
&& first.getMateAlignmentStart() == second.getAlignmentStart()
&& first.getAlignmentStart() == second.getMateAlignmentStart()
&& !SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(first.getMateReferenceName())
&& !SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(second.getMateReferenceName())
&& Objects.equals(first.getReferenceName(), second.getMateReferenceName())
&& Objects.equals(first.getMateReferenceName(), second.getReferenceName());
}

/**
* Determine if there is a single primary alignment in a list of alignments.
* @param records
Expand Down
81 changes: 80 additions & 1 deletion src/test/java/picard/sam/MergeBamAlignmentTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1907,5 +1907,84 @@ public void testHeaderFromMappedBreaks(final String filename) throws IOException
assertSamValid(mergedSam);
IOUtil.assertFilesEqual(expectedSam, mergedSam);
}
}

@Test
public void testPairedReadsAreProcessedProperlyInAbsenceOfHitIndexTags () throws IOException {
final File unmappedSam = new File(TEST_DATA_DIR, "unmapped1.sam");
final File alignedSam = new File(TEST_DATA_DIR, "aligned.sam");
final File expectedSam = new File(TEST_DATA_DIR, "merged_expected.sam");
final File refFasta = new File(TEST_DATA_DIR, "reference.fasta");
final File mergedSam = File.createTempFile("merged", ".sam");
mergedSam.deleteOnExit();

doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam),
null, null, null, null,
false, true, false, 1,
"0", "1.0", "align!", "myAligner",
true, refFasta, mergedSam,
null, null, null, null, true, SAMFileHeader.SortOrder.queryname);

assertSamValid(mergedSam);
IOUtil.assertFilesEqual(expectedSam, mergedSam);
}

@Test
public void testCoordinateByMateMethodWorksProperly () throws IOException {
final HitsForInsert hitsForInsert = new HitsForInsert();
final SAMFileHeader header = new SAMFileHeader();
header.setSortOrder(SAMFileHeader.SortOrder.queryname);

{ // Both first and second of pair
final SAMRecord first = new SAMRecord(header);
final SAMRecord second = new SAMRecord(header);

first.setReferenceName("chrm1");
first.setAlignmentStart(20);
first.setMateReferenceName("chrm2");
first.setMateAlignmentStart(1);
second.setReferenceName("chrm2");
second.setAlignmentStart(1);
second.setMateReferenceName("chrm1");
second.setMateAlignmentStart(20);

hitsForInsert.addFirstOfPairOrFragment(first);
hitsForInsert.addSecondOfPair(second);
}

{ // Only first
final SAMRecord first = new SAMRecord(header);

first.setReferenceName("chrm1");
first.setAlignmentStart(8);
first.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
first.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);

hitsForInsert.addFirstOfPairOrFragment(first);
}

{ // Only Second
final SAMRecord second = new SAMRecord(header);

second.setReferenceName("chrm2");
second.setAlignmentStart(15);
second.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
second.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);

hitsForInsert.addFirstOfPairOrFragment(second);
}

for (int i = 0; i < 3; i++) {
final SAMRecord first = hitsForInsert.getFirstOfPair(i);
final SAMRecord second = hitsForInsert.getSecondOfPair(i);

if (first != null && second != null) {
Assert.assertEquals(first.getMateAlignmentStart(), second.getAlignmentStart());
Assert.assertEquals(second.getMateAlignmentStart(), first.getAlignmentStart());
} else if (first != null) {
Assert.assertEquals(first.getMateAlignmentStart(), SAMRecord.NO_ALIGNMENT_START);
} else {
Assert.assertEquals(second.getMateAlignmentStart(), SAMRecord.NO_ALIGNMENT_START);
}
}
}
}
7 changes: 7 additions & 0 deletions testdata/picard/sam/MergeBamAlignment/aligned.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@SQ SN:chr2 LN:243199373
@SQ SN:chr4 LN:191154276
@SQ SN:chr21 LN:48129895
@RG ID:RG1 PL:Illumina SM:some_sample
example1 113 chr21 10856814 0 29S91M31S chr2 89872071 0 GAACCCGAATAGAATGGAATGGAATGGAATGGAACGGAACGGAATGGAATGGAATGGAATGGAATGGAATGGAACGGAACGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATCAACGCGAGTGCAGGGGAATGGAATGGAATG 0?/112F4HDBFHHFGF4FHGGHHH4HHFGBGF31GHG@3HHHFFBHHEBF5EADFFHHHHHHGHHHGDFHGFFEHHFFEGGHHHHHGGDFHHHHHHHHHHHHHGHHHHHHHHHHHHHHHGGGGGFHHHGGGGGFGFGGFFFFFFFBBBBB NM:i:4 RG:Z:RG1
example1 177 chr2 89872071 25 79S63M9S chr21 10856814 0 GCATTCCATTCTACTCCAGTTAATCCCATTCCATTCTATTCCATTCCACTCCATTCCATTCCATTGCAATGGAGTGGACTCCATTCCATTCCATTCCATTCCGGATGATTCCATTCCATTGCATTCCATTCCATTCCATTCCCCTGCACTC 1B1111B44433B033443B33?B03GB3FHFF33333333B3BBFBB233B3EFBB3BC3GDDF5FHGE23FFA3AF2CBFFDFFF3G3E33DFHGFEFGGHHFEHHFHHGHHGGE5BE5HHFFEC4GFGGFGFEC4E?@BFFFFAABBB NM:i:2 RG:Z:RG1
example1 417 chr4 49644617 6 86S40M25S chr21 10856814 0 GAGTGCAGGGGAATGGAATGGAATGGAATGCAATGGAATGGAATCATCCGGAATGGAATGGAATGGAATGGAGTCCACTCCATTGCAATGGAATGGAATGGAGTGGAATGGAATAGAATGGAATGGGATTAACTGGAGTAGAATGGAATGC BBBAAFFFFB@?E4CEFGFGGFG4CEFFHH5EB5EGGHHGHHFHHEFHHGGFEFGHFD33E3G3FFFDFFBC2FA3AFF32EGHF5FDDG3CB3BBFE3B332BBFBB3B33333333FFHF3BG30B?33B344330B33444B1111B1 NM:i:0 RG:Z:RG1
9 changes: 9 additions & 0 deletions testdata/picard/sam/MergeBamAlignment/merged_expected.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@HD VN:1.5 SO:queryname
@SQ SN:chr2 LN:243199373
@SQ SN:chr4 LN:191154276
@SQ SN:chr21 LN:48129895
@RG ID:RG1 PL:Illumina SM:some_sample
@PG ID:0 VN:1.0 CL:align! PN:myAligner
example1 113 chr21 10856814 0 29S91M31S chr2 89872071 0 GAACCCGAATAGAATGGAATGGAATGGAATGGAACGGAACGGAATGGAATGGAATGGAATGGAATGGAATGGAACGGAACGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATCAACGCGAGTGCAGGGGAATGGAATGGAATG 0?/112F4HDBFHHFGF4FHGGHHH4HHFGBGF31GHG@3HHHFFBHHEBF5EADFFHHHHHHGHHHGDFHGFFEHHFFEGGHHHHHGGDFHHHHHHHHHHHHHGHHHHHHHHHHHHHHHGGGGGFHHHGGGGGFGFGGFFFFFFFBBBBB MC:Z:79S63M9S PG:Z:0 RG:Z:RG1 HI:i:0 NM:i:4 MQ:i:25
example1 177 chr2 89872071 25 79S63M9S chr21 10856814 0 GCATTCCATTCTACTCCAGTTAATCCCATTCCATTCTATTCCATTCCACTCCATTCCATTCCATTGCAATGGAGTGGACTCCATTCCATTCCATTCCATTCCGGATGATTCCATTCCATTGCATTCCATTCCATTCCATTCCCCTGCACTC 1B1111B44433B033443B33?B03GB3FHFF33333333B3BBFBB233B3EFBB3BC3GDDF5FHGE23FFA3AF2CBFFDFFF3G3E33DFHGFEFGGHHFEHHFHHGHHGGE5BE5HHFFEC4GFGGFGFEC4E?@BFFFFAABBB MC:Z:29S91M31S PG:Z:0 RG:Z:RG1 HI:i:0 NM:i:2 MQ:i:0
example1 393 chr4 49644617 6 86S40M25S = 49644617 0 GAGTGCAGGGGAATGGAATGGAATGGAATGCAATGGAATGGAATCATCCGGAATGGAATGGAATGGAATGGAGTCCACTCCATTGCAATGGAATGGAATGGAGTGGAATGGAATAGAATGGAATGGGATTAACTGGAGTAGAATGGAATGC BBBAAFFFFB@?E4CEFGFGGFG4CEFFHH5EB5EGGHHGHHFHHEFHHGGFEFGHFD33E3G3FFFDFFBC2FA3AFF32EGHF5FDDG3CB3BBFE3B332BBFBB3B33333333FFHF3BG30B?33B344330B33444B1111B1 PG:Z:0 RG:Z:RG1 NM:i:0
4 changes: 4 additions & 0 deletions testdata/picard/sam/MergeBamAlignment/reference.dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@HD VN:1.0 SO:unsorted
@SQ SN:chr2 LN:243199373
@SQ SN:chr4 LN:191154276
@SQ SN:chr21 LN:48129895
1 change: 1 addition & 0 deletions testdata/picard/sam/MergeBamAlignment/reference.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
; picard.sam.MergeBamAlignmentTest.testPairedReadsAreProcessedProperlyInAbsenceOfHitIndexTags
4 changes: 4 additions & 0 deletions testdata/picard/sam/MergeBamAlignment/unmapped1.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@HD VN:1.5 SO:queryname
@RG ID:RG1 PL:Illumina SM:some_sample
example1 77 * 0 0 * * 0 0 CATTCCATTCCATTCCCCTGCACTCGCGTTGATTCCATTCCATTCCATTCCATTCCATTCCATTCCATTCCGTTCCGTTCCATTCCATTCCATTCCATTCCATTCCATTCCGTTCCGTTCCATTCCATTCCATTCCATTCTATTCGGGTTC BBBBBFFFFFFFGGFGFGGGGGHHHFGGGGGHHHHHHHHHHHHHHHGHHHHHHHHHHHHHFDGGHHHHHGGEFFHHEFFGHFDGHHHGHHHHHHFFDAE5FBEHHBFFHHH3@GHG13FGBGFHH4HHHGGHF4FGFHHFBDH4F211/?0 RG:Z:RG1
example1 141 * 0 0 * * 0 0 GAGTGCAGGGGAATGGAATGGAATGGAATGCAATGGAATGGAATCATCCGGAATGGAATGGAATGGAATGGAGTCCACTCCATTGCAATGGAATGGAATGGAGTGGAATGGAATAGAATGGAATGGGATTAACTGGAGTAGAATGGAATGC BBBAAFFFFB@?E4CEFGFGGFG4CEFFHH5EB5EGGHHGHHFHHEFHHGGFEFGHFD33E3G3FFFDFFBC2FA3AFF32EGHF5FDDG3CB3BBFE3B332BBFBB3B33333333FFHF3BG30B?33B344330B33444B1111B1 RG:Z:RG1

0 comments on commit 642393c

Please sign in to comment.