From c1929ab7456acf0ecde5166cd16d18b2e77e9bbc Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Thu, 29 Sep 2022 10:57:49 -0400 Subject: [PATCH 1/6] Cloud-enable BuildBamIndex outputs. --- src/main/java/picard/sam/BuildBamIndex.java | 15 ++--- .../java/picard/sam/BuildBamIndexTest.java | 65 ++++++++++++------- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/main/java/picard/sam/BuildBamIndex.java b/src/main/java/picard/sam/BuildBamIndex.java index 5141688328..89f6ed4100 100755 --- a/src/main/java/picard/sam/BuildBamIndex.java +++ b/src/main/java/picard/sam/BuildBamIndex.java @@ -75,7 +75,7 @@ public class BuildBamIndex extends CommandLineProgram { @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The BAM index file. Defaults to x.bai if INPUT is x.bam, otherwise INPUT.bai.\n" + "If INPUT is a URL and OUTPUT is unspecified, defaults to a file in the current directory.", optional = true) - public File OUTPUT; + public PicardHtsPath OUTPUT; /** * Main method for the program. Checks that all input files are present and @@ -91,20 +91,17 @@ protected int doWork() { final String baseFileName = inputPath.getFileName().toString(); // only BAI indices can be created for now, although CSI indices can be read as well - if (baseFileName.endsWith(FileExtensions.BAM)) { + if (INPUT.hasExtension(FileExtensions.BAM)) { final int index = baseFileName.lastIndexOf('.'); - OUTPUT = new File(baseFileName.substring(0, index) + FileExtensions.BAI_INDEX); - + final String outputIndexFileName = baseFileName.substring(0, index) + FileExtensions.BAI_INDEX; + OUTPUT = new PicardHtsPath(inputPath.resolveSibling(outputIndexFileName).toUri().toString()); } else { - OUTPUT = new File(baseFileName + FileExtensions.BAI_INDEX); + OUTPUT = new PicardHtsPath(inputPath.resolveSibling(baseFileName + FileExtensions.BAI_INDEX).toUri().toString()); } } - IOUtil.assertFileIsWritable(OUTPUT); final SamReader bam; - - bam = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE) .disable(SamReaderFactory.Option.EAGERLY_DECODE) .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS) @@ -118,7 +115,7 @@ protected int doWork() { throw new SAMException("Input bam file must be sorted by coordinate"); } - BAMIndexer.createIndex(bam, OUTPUT); + BAMIndexer.createIndex(bam, OUTPUT.toPath()); log.info("Successfully wrote bam index file " + OUTPUT); CloserUtil.close(bam); diff --git a/src/test/java/picard/sam/BuildBamIndexTest.java b/src/test/java/picard/sam/BuildBamIndexTest.java index 1c01f7567d..a9f1cc8504 100644 --- a/src/test/java/picard/sam/BuildBamIndexTest.java +++ b/src/test/java/picard/sam/BuildBamIndexTest.java @@ -1,72 +1,89 @@ package picard.sam; +import htsjdk.beta.io.IOPathUtils; +import htsjdk.io.IOPath; import htsjdk.samtools.SAMException; import org.apache.commons.io.FileUtils; import org.testng.Assert; -import org.testng.annotations.AfterTest; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.nio.PicardHtsPath; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.ArrayList; import java.util.List; public class BuildBamIndexTest extends CommandLineProgramTest { private static final File TEST_DATA_DIR = new File("testdata/picard/indices/"); - private static final PicardHtsPath INPUT_FILE = new PicardHtsPath(new File(TEST_DATA_DIR, "index_test.sam").getPath()); - private static final File OUTPUT_SORTED_FILE = new File(TEST_DATA_DIR, "index_test_sorted.bam"); - private static final File OUTPUT_UNSORTED_FILE = new File(TEST_DATA_DIR, "/index_test_unsorted.bam"); - private static final File OUTPUT_INDEX_FILE = new File(TEST_DATA_DIR, "/index_test.bam.bai"); + private static final PicardHtsPath INPUT_UNSORTED_SAM = new PicardHtsPath(new File(TEST_DATA_DIR, "index_test.sam").getPath()); private static final File EXPECTED_BAI_FILE = new File(TEST_DATA_DIR, "index_test_b.bam.bai"); public String getCommandLineProgramName() { return BuildBamIndex.class.getSimpleName(); } - // Test that the index file for a sorted BAM is created @Test public void testBuildBamIndexOK() throws IOException { + final IOPath sortedBAM = IOPathUtils.createTempPath("index_test_sorted", ".bam"); + // don't create the output index file, but mark it for deletion + final Path indexOutput = sortedBAM.toPath().resolveSibling("index_test.bam.bai"); + indexOutput.toFile().deleteOnExit(); + + /* First sort, before indexing */ + new SortSam().instanceMain(new String[]{ + "I=" + INPUT_UNSORTED_SAM, + "O=" + sortedBAM, + "SORT_ORDER=coordinate"}); + final List args = new ArrayList<>(); + args.add("INPUT=" + sortedBAM); + args.add("OUTPUT=" + indexOutput); + runPicardCommandLine(args); + Assert.assertEquals(FileUtils.readFileToByteArray(indexOutput.toFile()), FileUtils.readFileToByteArray(EXPECTED_BAI_FILE)); + } + + // Test that the index file for a sorted BAM is created in the right place if no output file is specified + @Test + public void testBuildBamIndexDefaultOutput() throws IOException { + final IOPath sortedBAM = IOPathUtils.createTempPath("index_test_sorted", ".bam"); /* First sort, before indexing */ new SortSam().instanceMain(new String[]{ - "I=" + INPUT_FILE, - "O=" + OUTPUT_SORTED_FILE, + "I=" + INPUT_UNSORTED_SAM, + "O=" + sortedBAM, "SORT_ORDER=coordinate"}); - args.add("INPUT=" + OUTPUT_SORTED_FILE); - args.add("OUTPUT=" + OUTPUT_INDEX_FILE); + // don't create the output index file, but construct the expected name, and mark it for deletion + final String expectedIndexFileName = sortedBAM.getURIString().replace(".bam", ".bai"); + final IOPath indexOutput = new PicardHtsPath(expectedIndexFileName); + indexOutput.toPath().toFile().deleteOnExit(); + + final List args = new ArrayList<>(); + args.add("INPUT=" + sortedBAM); runPicardCommandLine(args); - Assert.assertEquals(FileUtils.readFileToByteArray(OUTPUT_INDEX_FILE), FileUtils.readFileToByteArray(EXPECTED_BAI_FILE)); + Assert.assertEquals(FileUtils.readFileToByteArray(indexOutput.toPath().toFile()), FileUtils.readFileToByteArray(EXPECTED_BAI_FILE)); } // Test that the index creation fails when presented with a SAM file @Test(expectedExceptions = SAMException.class) public void testBuildSamIndexFail() { final List args = new ArrayList<>(); - args.add("INPUT=" + INPUT_FILE); - args.add("OUTPUT=" + OUTPUT_INDEX_FILE); + args.add("INPUT=" + INPUT_UNSORTED_SAM); runPicardCommandLine(args); } // Test that the index creation fails when presented with an unsorted BAM file @Test(expectedExceptions = SAMException.class) public void testBuildBamIndexFail() { - final List args = new ArrayList<>(); + final IOPath unsortedBAM = IOPathUtils.createTempPath("index_test_sorted", ".bam"); new SamFormatConverter().instanceMain(new String[]{ - "INPUT=" + INPUT_FILE, - "OUTPUT=" + OUTPUT_UNSORTED_FILE}); + "INPUT=" + INPUT_UNSORTED_SAM, + "OUTPUT=" + unsortedBAM}); - args.add("INPUT=" + OUTPUT_UNSORTED_FILE); - args.add("OUTPUT=" + OUTPUT_INDEX_FILE); + final List args = new ArrayList<>(); + args.add("INPUT=" + unsortedBAM); runPicardCommandLine(args); } - @AfterTest - public void cleanup() throws IOException { - FileUtils.forceDeleteOnExit(OUTPUT_INDEX_FILE); - FileUtils.forceDeleteOnExit(OUTPUT_SORTED_FILE); - FileUtils.forceDeleteOnExit(OUTPUT_UNSORTED_FILE); - } } From df058b20c3b3643328c4a6cd0b07d46505286cb3 Mon Sep 17 00:00:00 2001 From: Takuto Sato Date: Sun, 23 Jun 2024 15:43:43 -0400 Subject: [PATCH 2/6] write cloud tests --- src/main/java/picard/nio/PicardHtsPath.java | 20 ++++ src/main/java/picard/sam/SortSam.java | 19 ++-- .../java/picard/sam/BuildBamIndexTest.java | 97 +++++++++++++------ 3 files changed, 97 insertions(+), 39 deletions(-) diff --git a/src/main/java/picard/nio/PicardHtsPath.java b/src/main/java/picard/nio/PicardHtsPath.java index 3c22025088..b17f1fb48c 100644 --- a/src/main/java/picard/nio/PicardHtsPath.java +++ b/src/main/java/picard/nio/PicardHtsPath.java @@ -62,6 +62,10 @@ public PicardHtsPath(final String rawInputString) { super(rawInputString); } + public PicardHtsPath(final String directory, final String file) { + super(directory + file); + } + /** * Create a PicardHtsPath from an existing {@link HtsPath} or subclass. * @@ -179,10 +183,26 @@ public static PicardHtsPath replaceExtension(final IOPath path, final String new return PicardHtsPath.fromPath(path.toPath().resolveSibling(newFileName)); } + public static PicardHtsPath replaceExtension(final IOPath path, final String newExtension){ + return replaceExtension(path, newExtension, false); + } + + /** * Wrapper for Path.resolve() */ public static PicardHtsPath resolve(final PicardHtsPath absPath, final String relativePath){ return PicardHtsPath.fromPath(absPath.toPath().resolve(relativePath)); } + + /** + * Wrapper for Path.resolveSibling() + */ + public static PicardHtsPath resolveSibling(final PicardHtsPath absPath, final String other){ + return PicardHtsPath.fromPath(absPath.toPath().resolveSibling(other)); + } + + public boolean isLocalPath(){ + return getScheme().equals(PicardBucketUtils.FILE_SCHEME); + } } diff --git a/src/main/java/picard/sam/SortSam.java b/src/main/java/picard/sam/SortSam.java index a423a802bf..eff8fc34e7 100644 --- a/src/main/java/picard/sam/SortSam.java +++ b/src/main/java/picard/sam/SortSam.java @@ -40,6 +40,8 @@ import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; +import picard.nio.PicardBucketUtils; +import picard.nio.PicardHtsPath; import java.io.File; @@ -101,10 +103,10 @@ public class SortSam extends CommandLineProgram { "
"; @Argument(doc = "The SAM, BAM or CRAM file to sort.", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME) - public File INPUT; + public PicardHtsPath INPUT; @Argument(doc = "The sorted SAM, BAM or CRAM output file. ", shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME) - public File OUTPUT; + public PicardHtsPath OUTPUT; // note that SortOrder here is a local enum, not the SamFileHeader version. @Argument(shortName = StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, doc = "Sort order of output file. ") @@ -149,12 +151,15 @@ public String getHelpDoc() { protected int doWork() { - IOUtil.assertFileIsReadable(INPUT); - IOUtil.assertFileIsWritable(OUTPUT); - final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); - ; + IOUtil.assertFileIsReadable(INPUT.toPath()); + if (INPUT.getScheme().equals(PicardBucketUtils.FILE_SCHEME)){ + IOUtil.assertFileIsWritable(OUTPUT.toPath()); + } + + final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()).open(INPUT.toPath()); + reader.getFileHeader().setSortOrder(SORT_ORDER.getSortOrder()); - final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), false, OUTPUT, REFERENCE_SEQUENCE); + final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), false, OUTPUT.toPath(), referenceSequence.getReferencePath()); writer.setProgressLogger( new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection")); diff --git a/src/test/java/picard/sam/BuildBamIndexTest.java b/src/test/java/picard/sam/BuildBamIndexTest.java index a9f1cc8504..586e8bf5ab 100644 --- a/src/test/java/picard/sam/BuildBamIndexTest.java +++ b/src/test/java/picard/sam/BuildBamIndexTest.java @@ -5,64 +5,75 @@ import htsjdk.samtools.SAMException; import org.apache.commons.io.FileUtils; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; +import picard.nio.PicardBucketUtils; import picard.nio.PicardHtsPath; +import picard.nio.PicardIOUtils; import java.io.File; import java.io.IOException; -import java.nio.file.Path; +import java.nio.file.Files; import java.util.ArrayList; import java.util.List; public class BuildBamIndexTest extends CommandLineProgramTest { private static final File TEST_DATA_DIR = new File("testdata/picard/indices/"); - private static final PicardHtsPath INPUT_UNSORTED_SAM = new PicardHtsPath(new File(TEST_DATA_DIR, "index_test.sam").getPath()); + private static final PicardHtsPath INPUT_UNSORTED_SAM = new PicardHtsPath(new File(TEST_DATA_DIR, "index_test.sam")); private static final File EXPECTED_BAI_FILE = new File(TEST_DATA_DIR, "index_test_b.bam.bai"); + private static final String CLOUD_TEST_DATA_DIR = "gs://hellbender/test/resources/picard/BuildBamIndex/"; + private static final String CLOUD_TEST_OUTPUT_DIR = "gs://hellbender-test-logs/staging/picard/test/BuildBamIndex/"; + // tsato: shouldn't we have a constructor for this? + private static final PicardHtsPath INPUT_UNSORTED_SAM_CLOUD = new PicardHtsPath(CLOUD_TEST_DATA_DIR, "index_test.sam"); + + // tsato: replace with variables defined in the other branches once they merge + private static final PicardHtsPath CLOUD_SORTED_CRAM_CLOUD = new PicardHtsPath("gs://hellbender/test/resources/picard/BuildBamIndex/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram"); + public String getCommandLineProgramName() { return BuildBamIndex.class.getSimpleName(); } + @DataProvider(name = "buildBamIndexTestData") + public Object[][] getBuildBamIndexTestData(){ + return new Object[][]{ + {INPUT_UNSORTED_SAM, true}, + {INPUT_UNSORTED_SAM, false}, + {INPUT_UNSORTED_SAM_CLOUD, true}, + {INPUT_UNSORTED_SAM_CLOUD, false}}; + } + + // tsato: must add cram test... + // Test that the index file for a sorted BAM is created - @Test - public void testBuildBamIndexOK() throws IOException { - final IOPath sortedBAM = IOPathUtils.createTempPath("index_test_sorted", ".bam"); - // don't create the output index file, but mark it for deletion - final Path indexOutput = sortedBAM.toPath().resolveSibling("index_test.bam.bai"); - indexOutput.toFile().deleteOnExit(); - - /* First sort, before indexing */ + @Test(dataProvider = "buildBamIndexTestData") + public void testBuildBamIndexOK(final PicardHtsPath inputUnsortedSam, final boolean specifyOutput) throws IOException { + final boolean cloud = ! inputUnsortedSam.isLocalPath(); + final String prefix = "index_test_sorted"; + final PicardHtsPath sortedBAM = cloud ? PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix,".bam") : + PicardBucketUtils.getTempFilePath(null, prefix, ".bam"); + + /* First sort, before indexing */ // tsato: do we need to do this dynamically? new SortSam().instanceMain(new String[]{ - "I=" + INPUT_UNSORTED_SAM, + "I=" + inputUnsortedSam, "O=" + sortedBAM, "SORT_ORDER=coordinate"}); final List args = new ArrayList<>(); args.add("INPUT=" + sortedBAM); - args.add("OUTPUT=" + indexOutput); - runPicardCommandLine(args); - Assert.assertEquals(FileUtils.readFileToByteArray(indexOutput.toFile()), FileUtils.readFileToByteArray(EXPECTED_BAI_FILE)); - } + final PicardHtsPath indexOutput; - // Test that the index file for a sorted BAM is created in the right place if no output file is specified - @Test - public void testBuildBamIndexDefaultOutput() throws IOException { - final IOPath sortedBAM = IOPathUtils.createTempPath("index_test_sorted", ".bam"); - /* First sort, before indexing */ - new SortSam().instanceMain(new String[]{ - "I=" + INPUT_UNSORTED_SAM, - "O=" + sortedBAM, - "SORT_ORDER=coordinate"}); - - // don't create the output index file, but construct the expected name, and mark it for deletion - final String expectedIndexFileName = sortedBAM.getURIString().replace(".bam", ".bai"); - final IOPath indexOutput = new PicardHtsPath(expectedIndexFileName); - indexOutput.toPath().toFile().deleteOnExit(); + if (specifyOutput) { + indexOutput = cloud ? PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix, ".bai") : + PicardBucketUtils.getTempFilePath(null, prefix,".bai"); + args.add("OUTPUT=" + indexOutput); + } else { + indexOutput = PicardHtsPath.replaceExtension(sortedBAM, ".bai", false); + PicardIOUtils.deleteOnExit(indexOutput.toPath()); + } - final List args = new ArrayList<>(); - args.add("INPUT=" + sortedBAM); runPicardCommandLine(args); - Assert.assertEquals(FileUtils.readFileToByteArray(indexOutput.toPath().toFile()), FileUtils.readFileToByteArray(EXPECTED_BAI_FILE)); + Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(EXPECTED_BAI_FILE.toPath())); } // Test that the index creation fails when presented with a SAM file @@ -86,4 +97,26 @@ public void testBuildBamIndexFail() { runPicardCommandLine(args); } + @Test() + public void testCloudCram() throws IOException { + final List args = new ArrayList<>(); + args.add("INPUT=" + CLOUD_SORTED_CRAM_CLOUD); + args.add("REFERENCE_SEQUENCE=" + "gs://hellbender/test/resources/picard/references/human_g1k_v37.20.21.fasta"); // tsato: replace with variable + + final boolean specifyOutput = true; // for now + final PicardHtsPath indexOutput; + final String prefix = "BuildBamIndex_cram_test"; + if (specifyOutput) { + indexOutput = PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix, ".crai"); + args.add("OUTPUT=" + indexOutput); + } else { + indexOutput = PicardHtsPath.replaceExtension(CLOUD_SORTED_CRAM_CLOUD, ".crai", false); + PicardIOUtils.deleteOnExit(indexOutput.toPath()); + } + + runPicardCommandLine(args); + // tsato: gotta fix the suffix of this file... + final PicardHtsPath expectedCramIndex = new PicardHtsPath("gs://hellbender/test/resources/picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram.bai"); + Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(expectedCramIndex.toPath())); + } } From 07aea54fa0b72fcb03611b7fbe2bb5016720a3dd Mon Sep 17 00:00:00 2001 From: Takuto Sato Date: Tue, 25 Jun 2024 14:56:33 -0400 Subject: [PATCH 3/6] attempt to support cram --- src/main/java/picard/sam/BuildBamIndex.java | 57 ++++++++++--------- .../java/picard/sam/BuildBamIndexTest.java | 20 ++++--- .../picard/reference/human_g1k_v37.20.21.dict | 3 + 3 files changed, 44 insertions(+), 36 deletions(-) create mode 100644 testdata/picard/reference/human_g1k_v37.20.21.dict diff --git a/src/main/java/picard/sam/BuildBamIndex.java b/src/main/java/picard/sam/BuildBamIndex.java index 89f6ed4100..4a1224d675 100755 --- a/src/main/java/picard/sam/BuildBamIndex.java +++ b/src/main/java/picard/sam/BuildBamIndex.java @@ -25,24 +25,32 @@ package picard.sam; import htsjdk.samtools.BAMIndexer; +import htsjdk.samtools.CRAMCRAIIndexer; +import htsjdk.samtools.CRAMIndexer; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.seekablestream.SeekablePathStream; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; +import htsjdk.utils.ValidationUtils; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; +import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; import picard.nio.PicardHtsPath; import java.io.File; +import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; /** @@ -69,7 +77,7 @@ public class BuildBamIndex extends CommandLineProgram { private static final Log log = Log.getInstance(BuildBamIndex.class); @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, - doc = "A BAM file or GA4GH URL to process. Must be sorted in coordinate order.") + doc = "A BAM file or GA4GH URL to process. Must be sorted in coordinate order.") // tsato: Is the GA4GH bit accurate? public PicardHtsPath INPUT; @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, @@ -83,41 +91,36 @@ public class BuildBamIndex extends CommandLineProgram { * all the records generating a BAM Index, then writes the bai file. */ protected int doWork() { - final Path inputPath = INPUT.toPath(); - - // set default output file - input-file.bai - if (OUTPUT == null) { - - final String baseFileName = inputPath.getFileName().toString(); - - // only BAI indices can be created for now, although CSI indices can be read as well - if (INPUT.hasExtension(FileExtensions.BAM)) { - - final int index = baseFileName.lastIndexOf('.'); - final String outputIndexFileName = baseFileName.substring(0, index) + FileExtensions.BAI_INDEX; - OUTPUT = new PicardHtsPath(inputPath.resolveSibling(outputIndexFileName).toUri().toString()); - } else { - OUTPUT = new PicardHtsPath(inputPath.resolveSibling(baseFileName + FileExtensions.BAI_INDEX).toUri().toString()); - } - } + ValidationUtils.validateArg(INPUT.hasExtension(FileExtensions.BAM) || INPUT.hasExtension(FileExtensions.CRAM), + "only BAM and CRAM files are supported. INPUT = " + INPUT); final SamReader bam; - bam = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE) + bam = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()) .disable(SamReaderFactory.Option.EAGERLY_DECODE) .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS) - .open(SamInputResource.of(inputPath)); + .open(SamInputResource.of(INPUT.toPath())); + ValidationUtils.validateArg(bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate), + "Input bam file must be sorted by coordinates"); - if (bam.type() != SamReader.Type.BAM_TYPE && bam.type() != SamReader.Type.BAM_CSI_TYPE) { - throw new SAMException("Input file must be bam file, not sam file."); + // set default output file - .bai or .crai + if (OUTPUT == null) { + final String extension = INPUT.hasExtension(FileExtensions.BAM) ? FileExtensions.BAI_INDEX : FileExtensions.CRAM_INDEX; + OUTPUT = PicardHtsPath.replaceExtension(INPUT, extension); } - if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) { - throw new SAMException("Input bam file must be sorted by coordinate"); + if (bam.type() == SamReader.Type.BAM_TYPE || bam.type() == SamReader.Type.BAM_CSI_TYPE){ + BAMIndexer.createIndex(bam, OUTPUT.toPath()); + } else if (bam.type() == SamReader.Type.CRAM_TYPE){ + try (SeekableStream seekableStream = new SeekablePathStream(INPUT.toPath())){ + CRAMCRAIIndexer.writeIndex(seekableStream, Files.newOutputStream(OUTPUT.toPath())); + } catch (IOException e){ + throw new SAMException("Unable to write the CRAM Index " + OUTPUT); + } + } else { + throw new PicardException("Unsupported file type: " + INPUT); } - BAMIndexer.createIndex(bam, OUTPUT.toPath()); - - log.info("Successfully wrote bam index file " + OUTPUT); + log.info("Successfully wrote bam index file " + OUTPUT); // tsato: bam -> SAM CloserUtil.close(bam); return 0; } diff --git a/src/test/java/picard/sam/BuildBamIndexTest.java b/src/test/java/picard/sam/BuildBamIndexTest.java index 586e8bf5ab..1e07b63a86 100644 --- a/src/test/java/picard/sam/BuildBamIndexTest.java +++ b/src/test/java/picard/sam/BuildBamIndexTest.java @@ -3,7 +3,6 @@ import htsjdk.beta.io.IOPathUtils; import htsjdk.io.IOPath; import htsjdk.samtools.SAMException; -import org.apache.commons.io.FileUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -30,7 +29,7 @@ public class BuildBamIndexTest extends CommandLineProgramTest { private static final PicardHtsPath INPUT_UNSORTED_SAM_CLOUD = new PicardHtsPath(CLOUD_TEST_DATA_DIR, "index_test.sam"); // tsato: replace with variables defined in the other branches once they merge - private static final PicardHtsPath CLOUD_SORTED_CRAM_CLOUD = new PicardHtsPath("gs://hellbender/test/resources/picard/BuildBamIndex/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram"); + private static final PicardHtsPath SORTED_CRAM_CLOUD = new PicardHtsPath("gs://hellbender/test/resources/picard/BuildBamIndex/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram"); public String getCommandLineProgramName() { return BuildBamIndex.class.getSimpleName(); } @@ -100,23 +99,26 @@ public void testBuildBamIndexFail() { @Test() public void testCloudCram() throws IOException { final List args = new ArrayList<>(); - args.add("INPUT=" + CLOUD_SORTED_CRAM_CLOUD); + args.add("INPUT=" + SORTED_CRAM_CLOUD); args.add("REFERENCE_SEQUENCE=" + "gs://hellbender/test/resources/picard/references/human_g1k_v37.20.21.fasta"); // tsato: replace with variable - final boolean specifyOutput = true; // for now + final boolean specifyOutput = false; // for now + final PicardHtsPath indexOutput; final String prefix = "BuildBamIndex_cram_test"; if (specifyOutput) { indexOutput = PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix, ".crai"); args.add("OUTPUT=" + indexOutput); } else { - indexOutput = PicardHtsPath.replaceExtension(CLOUD_SORTED_CRAM_CLOUD, ".crai", false); - PicardIOUtils.deleteOnExit(indexOutput.toPath()); + indexOutput = PicardHtsPath.replaceExtension(SORTED_CRAM_CLOUD, ".crai", false); + // tsato: temporarily disable while investigating cram index created this way + // PicardIOUtils.deleteOnExit(indexOutput.toPath()); } runPicardCommandLine(args); - // tsato: gotta fix the suffix of this file... - final PicardHtsPath expectedCramIndex = new PicardHtsPath("gs://hellbender/test/resources/picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram.bai"); - Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(expectedCramIndex.toPath())); + // tsato: gotta fix the suffix of this file...or not? + final PicardHtsPath expectedCramIndex = new PicardHtsPath("gs://hellbender/test/resources/picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram.bai"); + int d = 3; + // Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(new File("/Users/tsato/workspace/picard/testdata/picard/test.index").toPath())); } } diff --git a/testdata/picard/reference/human_g1k_v37.20.21.dict b/testdata/picard/reference/human_g1k_v37.20.21.dict new file mode 100644 index 0000000000..7421f79098 --- /dev/null +++ b/testdata/picard/reference/human_g1k_v37.20.21.dict @@ -0,0 +1,3 @@ +@HD VN:1.5 SO:unsorted +@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta From fa01af6387e17bed648cd7a7c23252e9b23f360d Mon Sep 17 00:00:00 2001 From: Takuto Sato Date: Tue, 25 Jun 2024 16:25:06 -0400 Subject: [PATCH 4/6] cram tests --- .../java/picard/sam/BuildBamIndexTest.java | 56 +++++++++++++++---- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/src/test/java/picard/sam/BuildBamIndexTest.java b/src/test/java/picard/sam/BuildBamIndexTest.java index 1e07b63a86..c29116aaaa 100644 --- a/src/test/java/picard/sam/BuildBamIndexTest.java +++ b/src/test/java/picard/sam/BuildBamIndexTest.java @@ -2,7 +2,10 @@ import htsjdk.beta.io.IOPathUtils; import htsjdk.io.IOPath; +import htsjdk.samtools.CRAMCRAIIndexer; import htsjdk.samtools.SAMException; +import htsjdk.samtools.cram.CRAIEntry; +import htsjdk.samtools.cram.CRAIIndex; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -29,7 +32,12 @@ public class BuildBamIndexTest extends CommandLineProgramTest { private static final PicardHtsPath INPUT_UNSORTED_SAM_CLOUD = new PicardHtsPath(CLOUD_TEST_DATA_DIR, "index_test.sam"); // tsato: replace with variables defined in the other branches once they merge - private static final PicardHtsPath SORTED_CRAM_CLOUD = new PicardHtsPath("gs://hellbender/test/resources/picard/BuildBamIndex/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram"); + private static final PicardHtsPath SORTED_CRAM_CLOUD = new PicardHtsPath("gs://hellbender/test/resources/picard/BuildBamIndex/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram"); + // tsato: this directory missing a crai... + private static final PicardHtsPath SORTED_CRAM = new PicardHtsPath("/Users/tsato/workspace/picard/testdata/picard/test/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram"); + private static final PicardHtsPath SORTED_CRAM2 = new PicardHtsPath("/Users/tsato/workspace/picard/testdata/picard/test/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram"); + + public String getCommandLineProgramName() { return BuildBamIndex.class.getSimpleName(); } @@ -96,29 +104,53 @@ public void testBuildBamIndexFail() { runPicardCommandLine(args); } - @Test() - public void testCloudCram() throws IOException { - final List args = new ArrayList<>(); - args.add("INPUT=" + SORTED_CRAM_CLOUD); - args.add("REFERENCE_SEQUENCE=" + "gs://hellbender/test/resources/picard/references/human_g1k_v37.20.21.fasta"); // tsato: replace with variable + @DataProvider(name = "cramTestData") + public Object[][] getCramTestData(){ + // tsato: replace with variable + final PicardHtsPath localRef = new PicardHtsPath("/Users/tsato/workspace/picard/testdata/picard/reference/human_g1k_v37.20.21.fasta"); + final PicardHtsPath cloudRef = new PicardHtsPath("gs://hellbender/test/resources/picard/references/human_g1k_v37.20.21.fasta"); + + return new Object[][]{ + {SORTED_CRAM, localRef}}; + // {SORTED_CRAM, cloudRef}}; // , + // {SORTED_CRAM_CLOUD, localRef}, // tsato: these are too slow, disable for now. + // {SORTED_CRAM_CLOUD, cloudRef}, + } - final boolean specifyOutput = false; // for now + @Test(dataProvider = "cramTestData") + public void testCram(final PicardHtsPath cram, final PicardHtsPath reference) throws IOException { + final List args = new ArrayList<>(); + args.add("INPUT=" + cram); + args.add("REFERENCE_SEQUENCE=" + reference); final PicardHtsPath indexOutput; final String prefix = "BuildBamIndex_cram_test"; + + final boolean specifyOutput = false; // for now if (specifyOutput) { indexOutput = PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix, ".crai"); args.add("OUTPUT=" + indexOutput); } else { - indexOutput = PicardHtsPath.replaceExtension(SORTED_CRAM_CLOUD, ".crai", false); + indexOutput = PicardHtsPath.replaceExtension(cram, ".crai", false); // tsato: temporarily disable while investigating cram index created this way // PicardIOUtils.deleteOnExit(indexOutput.toPath()); } runPicardCommandLine(args); - // tsato: gotta fix the suffix of this file...or not? - final PicardHtsPath expectedCramIndex = new PicardHtsPath("gs://hellbender/test/resources/picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram.bai"); - int d = 3; - // Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(new File("/Users/tsato/workspace/picard/testdata/picard/test.index").toPath())); + + final CRAIIndex craiIndex = CRAMCRAIIndexer.readIndex(Files.newInputStream(indexOutput.toPath())); + final List entries = craiIndex.getCRAIEntries(); + // Let's start with this + + // *** CORRECT ONES *** + final CRAIIndex craiIndex2 = CRAMCRAIIndexer.readIndex( + Files.newInputStream(new File("/Users/tsato/workspace/picard/testdata/picard/test/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram.crai.samtools_save").toPath())); + final List entries2 = craiIndex2.getCRAIEntries(); + // *** END CORRECT ONES *** + + Assert.assertEquals(entries, entries2); // better test but uses a premade samtools index + Assert.assertEquals(entries.size(), 2); // tsato: more crude test; + + Files.delete(indexOutput.toPath()); // tsato: temporary } } From 0f6381093e6861029ade1956a586fb9072b4b500 Mon Sep 17 00:00:00 2001 From: Takuto Sato Date: Tue, 9 Jul 2024 17:15:28 -0400 Subject: [PATCH 5/6] fix piped input issue --- src/main/java/picard/sam/SortSam.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/picard/sam/SortSam.java b/src/main/java/picard/sam/SortSam.java index eff8fc34e7..7f495aca61 100644 --- a/src/main/java/picard/sam/SortSam.java +++ b/src/main/java/picard/sam/SortSam.java @@ -27,6 +27,7 @@ import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.util.CloserUtil; @@ -156,7 +157,7 @@ protected int doWork() { IOUtil.assertFileIsWritable(OUTPUT.toPath()); } - final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()).open(INPUT.toPath()); + final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()).open(SamInputResource.of(INPUT.toPath())); // tsato: needs to be wrapped in SamInputResource.of() ?? reader.getFileHeader().setSortOrder(SORT_ORDER.getSortOrder()); final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), false, OUTPUT.toPath(), referenceSequence.getReferencePath()); From 5902b18441e31e4bb928721b2d7b408821363b56 Mon Sep 17 00:00:00 2001 From: Takuto Sato Date: Thu, 11 Jul 2024 17:24:26 -0400 Subject: [PATCH 6/6] louis failing test --- src/test/java/picard/sam/BuildBamIndexTest.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/test/java/picard/sam/BuildBamIndexTest.java b/src/test/java/picard/sam/BuildBamIndexTest.java index c29116aaaa..43d70044f8 100644 --- a/src/test/java/picard/sam/BuildBamIndexTest.java +++ b/src/test/java/picard/sam/BuildBamIndexTest.java @@ -16,7 +16,9 @@ import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; @@ -153,4 +155,11 @@ public void testCram(final PicardHtsPath cram, final PicardHtsPath reference) th Files.delete(indexOutput.toPath()); // tsato: temporary } + + // From https://github.com/samtools/htsjdk/issues/1084 + @Test + public void testStdin() throws IOException { + final InputStream inputStream = Files.newInputStream(Paths.get("/dev/stdin/")); + inputStream.available(); + } }