diff --git a/src/main/java/picard/nio/PicardHtsPath.java b/src/main/java/picard/nio/PicardHtsPath.java index 3c22025088..b17f1fb48c 100644 --- a/src/main/java/picard/nio/PicardHtsPath.java +++ b/src/main/java/picard/nio/PicardHtsPath.java @@ -62,6 +62,10 @@ public PicardHtsPath(final String rawInputString) { super(rawInputString); } + public PicardHtsPath(final String directory, final String file) { + super(directory + file); + } + /** * Create a PicardHtsPath from an existing {@link HtsPath} or subclass. * @@ -179,10 +183,26 @@ public static PicardHtsPath replaceExtension(final IOPath path, final String new return PicardHtsPath.fromPath(path.toPath().resolveSibling(newFileName)); } + public static PicardHtsPath replaceExtension(final IOPath path, final String newExtension){ + return replaceExtension(path, newExtension, false); + } + + /** * Wrapper for Path.resolve() */ public static PicardHtsPath resolve(final PicardHtsPath absPath, final String relativePath){ return PicardHtsPath.fromPath(absPath.toPath().resolve(relativePath)); } + + /** + * Wrapper for Path.resolveSibling() + */ + public static PicardHtsPath resolveSibling(final PicardHtsPath absPath, final String other){ + return PicardHtsPath.fromPath(absPath.toPath().resolveSibling(other)); + } + + public boolean isLocalPath(){ + return getScheme().equals(PicardBucketUtils.FILE_SCHEME); + } } diff --git a/src/main/java/picard/sam/BuildBamIndex.java b/src/main/java/picard/sam/BuildBamIndex.java index 5141688328..4a1224d675 100755 --- a/src/main/java/picard/sam/BuildBamIndex.java +++ b/src/main/java/picard/sam/BuildBamIndex.java @@ -25,24 +25,32 @@ package picard.sam; import htsjdk.samtools.BAMIndexer; +import htsjdk.samtools.CRAMCRAIIndexer; +import htsjdk.samtools.CRAMIndexer; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.seekablestream.SeekablePathStream; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; +import htsjdk.utils.ValidationUtils; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; +import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; import picard.nio.PicardHtsPath; import java.io.File; +import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; /** @@ -69,13 +77,13 @@ public class BuildBamIndex extends CommandLineProgram { private static final Log log = Log.getInstance(BuildBamIndex.class); @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, - doc = "A BAM file or GA4GH URL to process. Must be sorted in coordinate order.") + doc = "A BAM file or GA4GH URL to process. Must be sorted in coordinate order.") // tsato: Is the GA4GH bit accurate? public PicardHtsPath INPUT; @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The BAM index file. Defaults to x.bai if INPUT is x.bam, otherwise INPUT.bai.\n" + "If INPUT is a URL and OUTPUT is unspecified, defaults to a file in the current directory.", optional = true) - public File OUTPUT; + public PicardHtsPath OUTPUT; /** * Main method for the program. Checks that all input files are present and @@ -83,44 +91,36 @@ public class BuildBamIndex extends CommandLineProgram { * all the records generating a BAM Index, then writes the bai file. */ protected int doWork() { - final Path inputPath = INPUT.toPath(); + ValidationUtils.validateArg(INPUT.hasExtension(FileExtensions.BAM) || INPUT.hasExtension(FileExtensions.CRAM), + "only BAM and CRAM files are supported. INPUT = " + INPUT); - // set default output file - input-file.bai - if (OUTPUT == null) { - - final String baseFileName = inputPath.getFileName().toString(); - - // only BAI indices can be created for now, although CSI indices can be read as well - if (baseFileName.endsWith(FileExtensions.BAM)) { - - final int index = baseFileName.lastIndexOf('.'); - OUTPUT = new File(baseFileName.substring(0, index) + FileExtensions.BAI_INDEX); - - } else { - OUTPUT = new File(baseFileName + FileExtensions.BAI_INDEX); - } - } - - IOUtil.assertFileIsWritable(OUTPUT); final SamReader bam; - - - bam = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE) + bam = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()) .disable(SamReaderFactory.Option.EAGERLY_DECODE) .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS) - .open(SamInputResource.of(inputPath)); + .open(SamInputResource.of(INPUT.toPath())); + ValidationUtils.validateArg(bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate), + "Input bam file must be sorted by coordinates"); - if (bam.type() != SamReader.Type.BAM_TYPE && bam.type() != SamReader.Type.BAM_CSI_TYPE) { - throw new SAMException("Input file must be bam file, not sam file."); + // set default output file - .bai or .crai + if (OUTPUT == null) { + final String extension = INPUT.hasExtension(FileExtensions.BAM) ? FileExtensions.BAI_INDEX : FileExtensions.CRAM_INDEX; + OUTPUT = PicardHtsPath.replaceExtension(INPUT, extension); } - if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) { - throw new SAMException("Input bam file must be sorted by coordinate"); + if (bam.type() == SamReader.Type.BAM_TYPE || bam.type() == SamReader.Type.BAM_CSI_TYPE){ + BAMIndexer.createIndex(bam, OUTPUT.toPath()); + } else if (bam.type() == SamReader.Type.CRAM_TYPE){ + try (SeekableStream seekableStream = new SeekablePathStream(INPUT.toPath())){ + CRAMCRAIIndexer.writeIndex(seekableStream, Files.newOutputStream(OUTPUT.toPath())); + } catch (IOException e){ + throw new SAMException("Unable to write the CRAM Index " + OUTPUT); + } + } else { + throw new PicardException("Unsupported file type: " + INPUT); } - BAMIndexer.createIndex(bam, OUTPUT); - - log.info("Successfully wrote bam index file " + OUTPUT); + log.info("Successfully wrote bam index file " + OUTPUT); // tsato: bam -> SAM CloserUtil.close(bam); return 0; } diff --git a/src/main/java/picard/sam/SortSam.java b/src/main/java/picard/sam/SortSam.java index a423a802bf..7f495aca61 100644 --- a/src/main/java/picard/sam/SortSam.java +++ b/src/main/java/picard/sam/SortSam.java @@ -27,6 +27,7 @@ import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.util.CloserUtil; @@ -40,6 +41,8 @@ import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; +import picard.nio.PicardBucketUtils; +import picard.nio.PicardHtsPath; import java.io.File; @@ -101,10 +104,10 @@ public class SortSam extends CommandLineProgram { "
"; @Argument(doc = "The SAM, BAM or CRAM file to sort.", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME) - public File INPUT; + public PicardHtsPath INPUT; @Argument(doc = "The sorted SAM, BAM or CRAM output file. ", shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME) - public File OUTPUT; + public PicardHtsPath OUTPUT; // note that SortOrder here is a local enum, not the SamFileHeader version. @Argument(shortName = StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, doc = "Sort order of output file. ") @@ -149,12 +152,15 @@ public String getHelpDoc() { protected int doWork() { - IOUtil.assertFileIsReadable(INPUT); - IOUtil.assertFileIsWritable(OUTPUT); - final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); - ; + IOUtil.assertFileIsReadable(INPUT.toPath()); + if (INPUT.getScheme().equals(PicardBucketUtils.FILE_SCHEME)){ + IOUtil.assertFileIsWritable(OUTPUT.toPath()); + } + + final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()).open(SamInputResource.of(INPUT.toPath())); // tsato: needs to be wrapped in SamInputResource.of() ?? + reader.getFileHeader().setSortOrder(SORT_ORDER.getSortOrder()); - final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), false, OUTPUT, REFERENCE_SEQUENCE); + final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), false, OUTPUT.toPath(), referenceSequence.getReferencePath()); writer.setProgressLogger( new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection")); diff --git a/src/test/java/picard/sam/BuildBamIndexTest.java b/src/test/java/picard/sam/BuildBamIndexTest.java index 1c01f7567d..43d70044f8 100644 --- a/src/test/java/picard/sam/BuildBamIndexTest.java +++ b/src/test/java/picard/sam/BuildBamIndexTest.java @@ -1,72 +1,165 @@ package picard.sam; +import htsjdk.beta.io.IOPathUtils; +import htsjdk.io.IOPath; +import htsjdk.samtools.CRAMCRAIIndexer; import htsjdk.samtools.SAMException; -import org.apache.commons.io.FileUtils; +import htsjdk.samtools.cram.CRAIEntry; +import htsjdk.samtools.cram.CRAIIndex; import org.testng.Assert; -import org.testng.annotations.AfterTest; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; +import picard.nio.PicardBucketUtils; import picard.nio.PicardHtsPath; +import picard.nio.PicardIOUtils; import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; public class BuildBamIndexTest extends CommandLineProgramTest { private static final File TEST_DATA_DIR = new File("testdata/picard/indices/"); - private static final PicardHtsPath INPUT_FILE = new PicardHtsPath(new File(TEST_DATA_DIR, "index_test.sam").getPath()); - private static final File OUTPUT_SORTED_FILE = new File(TEST_DATA_DIR, "index_test_sorted.bam"); - private static final File OUTPUT_UNSORTED_FILE = new File(TEST_DATA_DIR, "/index_test_unsorted.bam"); - private static final File OUTPUT_INDEX_FILE = new File(TEST_DATA_DIR, "/index_test.bam.bai"); + private static final PicardHtsPath INPUT_UNSORTED_SAM = new PicardHtsPath(new File(TEST_DATA_DIR, "index_test.sam")); private static final File EXPECTED_BAI_FILE = new File(TEST_DATA_DIR, "index_test_b.bam.bai"); + private static final String CLOUD_TEST_DATA_DIR = "gs://hellbender/test/resources/picard/BuildBamIndex/"; + private static final String CLOUD_TEST_OUTPUT_DIR = "gs://hellbender-test-logs/staging/picard/test/BuildBamIndex/"; + // tsato: shouldn't we have a constructor for this? + private static final PicardHtsPath INPUT_UNSORTED_SAM_CLOUD = new PicardHtsPath(CLOUD_TEST_DATA_DIR, "index_test.sam"); + + // tsato: replace with variables defined in the other branches once they merge + private static final PicardHtsPath SORTED_CRAM_CLOUD = new PicardHtsPath("gs://hellbender/test/resources/picard/BuildBamIndex/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram"); + // tsato: this directory missing a crai... + private static final PicardHtsPath SORTED_CRAM = new PicardHtsPath("/Users/tsato/workspace/picard/testdata/picard/test/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram"); + private static final PicardHtsPath SORTED_CRAM2 = new PicardHtsPath("/Users/tsato/workspace/picard/testdata/picard/test/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram"); + + + public String getCommandLineProgramName() { return BuildBamIndex.class.getSimpleName(); } + @DataProvider(name = "buildBamIndexTestData") + public Object[][] getBuildBamIndexTestData(){ + return new Object[][]{ + {INPUT_UNSORTED_SAM, true}, + {INPUT_UNSORTED_SAM, false}, + {INPUT_UNSORTED_SAM_CLOUD, true}, + {INPUT_UNSORTED_SAM_CLOUD, false}}; + } + + // tsato: must add cram test... // Test that the index file for a sorted BAM is created - @Test - public void testBuildBamIndexOK() throws IOException { - final List args = new ArrayList<>(); - /* First sort, before indexing */ + @Test(dataProvider = "buildBamIndexTestData") + public void testBuildBamIndexOK(final PicardHtsPath inputUnsortedSam, final boolean specifyOutput) throws IOException { + final boolean cloud = ! inputUnsortedSam.isLocalPath(); + final String prefix = "index_test_sorted"; + final PicardHtsPath sortedBAM = cloud ? PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix,".bam") : + PicardBucketUtils.getTempFilePath(null, prefix, ".bam"); + + /* First sort, before indexing */ // tsato: do we need to do this dynamically? new SortSam().instanceMain(new String[]{ - "I=" + INPUT_FILE, - "O=" + OUTPUT_SORTED_FILE, + "I=" + inputUnsortedSam, + "O=" + sortedBAM, "SORT_ORDER=coordinate"}); - args.add("INPUT=" + OUTPUT_SORTED_FILE); - args.add("OUTPUT=" + OUTPUT_INDEX_FILE); + final List args = new ArrayList<>(); + args.add("INPUT=" + sortedBAM); + final PicardHtsPath indexOutput; + + if (specifyOutput) { + indexOutput = cloud ? PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix, ".bai") : + PicardBucketUtils.getTempFilePath(null, prefix,".bai"); + args.add("OUTPUT=" + indexOutput); + } else { + indexOutput = PicardHtsPath.replaceExtension(sortedBAM, ".bai", false); + PicardIOUtils.deleteOnExit(indexOutput.toPath()); + } + runPicardCommandLine(args); - Assert.assertEquals(FileUtils.readFileToByteArray(OUTPUT_INDEX_FILE), FileUtils.readFileToByteArray(EXPECTED_BAI_FILE)); + Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(EXPECTED_BAI_FILE.toPath())); } // Test that the index creation fails when presented with a SAM file @Test(expectedExceptions = SAMException.class) public void testBuildSamIndexFail() { final List args = new ArrayList<>(); - args.add("INPUT=" + INPUT_FILE); - args.add("OUTPUT=" + OUTPUT_INDEX_FILE); + args.add("INPUT=" + INPUT_UNSORTED_SAM); runPicardCommandLine(args); } // Test that the index creation fails when presented with an unsorted BAM file @Test(expectedExceptions = SAMException.class) public void testBuildBamIndexFail() { - final List args = new ArrayList<>(); + final IOPath unsortedBAM = IOPathUtils.createTempPath("index_test_sorted", ".bam"); new SamFormatConverter().instanceMain(new String[]{ - "INPUT=" + INPUT_FILE, - "OUTPUT=" + OUTPUT_UNSORTED_FILE}); + "INPUT=" + INPUT_UNSORTED_SAM, + "OUTPUT=" + unsortedBAM}); - args.add("INPUT=" + OUTPUT_UNSORTED_FILE); - args.add("OUTPUT=" + OUTPUT_INDEX_FILE); + final List args = new ArrayList<>(); + args.add("INPUT=" + unsortedBAM); runPicardCommandLine(args); } - @AfterTest - public void cleanup() throws IOException { - FileUtils.forceDeleteOnExit(OUTPUT_INDEX_FILE); - FileUtils.forceDeleteOnExit(OUTPUT_SORTED_FILE); - FileUtils.forceDeleteOnExit(OUTPUT_UNSORTED_FILE); + @DataProvider(name = "cramTestData") + public Object[][] getCramTestData(){ + // tsato: replace with variable + final PicardHtsPath localRef = new PicardHtsPath("/Users/tsato/workspace/picard/testdata/picard/reference/human_g1k_v37.20.21.fasta"); + final PicardHtsPath cloudRef = new PicardHtsPath("gs://hellbender/test/resources/picard/references/human_g1k_v37.20.21.fasta"); + + return new Object[][]{ + {SORTED_CRAM, localRef}}; + // {SORTED_CRAM, cloudRef}}; // , + // {SORTED_CRAM_CLOUD, localRef}, // tsato: these are too slow, disable for now. + // {SORTED_CRAM_CLOUD, cloudRef}, + } + + @Test(dataProvider = "cramTestData") + public void testCram(final PicardHtsPath cram, final PicardHtsPath reference) throws IOException { + final List args = new ArrayList<>(); + args.add("INPUT=" + cram); + args.add("REFERENCE_SEQUENCE=" + reference); + + final PicardHtsPath indexOutput; + final String prefix = "BuildBamIndex_cram_test"; + + final boolean specifyOutput = false; // for now + if (specifyOutput) { + indexOutput = PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix, ".crai"); + args.add("OUTPUT=" + indexOutput); + } else { + indexOutput = PicardHtsPath.replaceExtension(cram, ".crai", false); + // tsato: temporarily disable while investigating cram index created this way + // PicardIOUtils.deleteOnExit(indexOutput.toPath()); + } + + runPicardCommandLine(args); + + final CRAIIndex craiIndex = CRAMCRAIIndexer.readIndex(Files.newInputStream(indexOutput.toPath())); + final List entries = craiIndex.getCRAIEntries(); + // Let's start with this + + // *** CORRECT ONES *** + final CRAIIndex craiIndex2 = CRAMCRAIIndexer.readIndex( + Files.newInputStream(new File("/Users/tsato/workspace/picard/testdata/picard/test/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram.crai.samtools_save").toPath())); + final List entries2 = craiIndex2.getCRAIEntries(); + // *** END CORRECT ONES *** + + Assert.assertEquals(entries, entries2); // better test but uses a premade samtools index + Assert.assertEquals(entries.size(), 2); // tsato: more crude test; + + Files.delete(indexOutput.toPath()); // tsato: temporary + } + + // From https://github.com/samtools/htsjdk/issues/1084 + @Test + public void testStdin() throws IOException { + final InputStream inputStream = Files.newInputStream(Paths.get("/dev/stdin/")); + inputStream.available(); } } diff --git a/testdata/picard/reference/human_g1k_v37.20.21.dict b/testdata/picard/reference/human_g1k_v37.20.21.dict new file mode 100644 index 0000000000..7421f79098 --- /dev/null +++ b/testdata/picard/reference/human_g1k_v37.20.21.dict @@ -0,0 +1,3 @@ +@HD VN:1.5 SO:unsorted +@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta