From df058b20c3b3643328c4a6cd0b07d46505286cb3 Mon Sep 17 00:00:00 2001 From: Takuto Sato Date: Sun, 23 Jun 2024 15:43:43 -0400 Subject: [PATCH] write cloud tests --- src/main/java/picard/nio/PicardHtsPath.java | 20 ++++ src/main/java/picard/sam/SortSam.java | 19 ++-- .../java/picard/sam/BuildBamIndexTest.java | 97 +++++++++++++------ 3 files changed, 97 insertions(+), 39 deletions(-) diff --git a/src/main/java/picard/nio/PicardHtsPath.java b/src/main/java/picard/nio/PicardHtsPath.java index 3c22025088..b17f1fb48c 100644 --- a/src/main/java/picard/nio/PicardHtsPath.java +++ b/src/main/java/picard/nio/PicardHtsPath.java @@ -62,6 +62,10 @@ public PicardHtsPath(final String rawInputString) { super(rawInputString); } + public PicardHtsPath(final String directory, final String file) { + super(directory + file); + } + /** * Create a PicardHtsPath from an existing {@link HtsPath} or subclass. * @@ -179,10 +183,26 @@ public static PicardHtsPath replaceExtension(final IOPath path, final String new return PicardHtsPath.fromPath(path.toPath().resolveSibling(newFileName)); } + public static PicardHtsPath replaceExtension(final IOPath path, final String newExtension){ + return replaceExtension(path, newExtension, false); + } + + /** * Wrapper for Path.resolve() */ public static PicardHtsPath resolve(final PicardHtsPath absPath, final String relativePath){ return PicardHtsPath.fromPath(absPath.toPath().resolve(relativePath)); } + + /** + * Wrapper for Path.resolveSibling() + */ + public static PicardHtsPath resolveSibling(final PicardHtsPath absPath, final String other){ + return PicardHtsPath.fromPath(absPath.toPath().resolveSibling(other)); + } + + public boolean isLocalPath(){ + return getScheme().equals(PicardBucketUtils.FILE_SCHEME); + } } diff --git a/src/main/java/picard/sam/SortSam.java b/src/main/java/picard/sam/SortSam.java index a423a802bf..eff8fc34e7 100644 --- a/src/main/java/picard/sam/SortSam.java +++ b/src/main/java/picard/sam/SortSam.java @@ -40,6 +40,8 @@ import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; +import picard.nio.PicardBucketUtils; +import picard.nio.PicardHtsPath; import java.io.File; @@ -101,10 +103,10 @@ public class SortSam extends CommandLineProgram { "
"; @Argument(doc = "The SAM, BAM or CRAM file to sort.", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME) - public File INPUT; + public PicardHtsPath INPUT; @Argument(doc = "The sorted SAM, BAM or CRAM output file. ", shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME) - public File OUTPUT; + public PicardHtsPath OUTPUT; // note that SortOrder here is a local enum, not the SamFileHeader version. @Argument(shortName = StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, doc = "Sort order of output file. ") @@ -149,12 +151,15 @@ public String getHelpDoc() { protected int doWork() { - IOUtil.assertFileIsReadable(INPUT); - IOUtil.assertFileIsWritable(OUTPUT); - final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); - ; + IOUtil.assertFileIsReadable(INPUT.toPath()); + if (INPUT.getScheme().equals(PicardBucketUtils.FILE_SCHEME)){ + IOUtil.assertFileIsWritable(OUTPUT.toPath()); + } + + final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()).open(INPUT.toPath()); + reader.getFileHeader().setSortOrder(SORT_ORDER.getSortOrder()); - final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), false, OUTPUT, REFERENCE_SEQUENCE); + final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), false, OUTPUT.toPath(), referenceSequence.getReferencePath()); writer.setProgressLogger( new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection")); diff --git a/src/test/java/picard/sam/BuildBamIndexTest.java b/src/test/java/picard/sam/BuildBamIndexTest.java index a9f1cc8504..586e8bf5ab 100644 --- a/src/test/java/picard/sam/BuildBamIndexTest.java +++ b/src/test/java/picard/sam/BuildBamIndexTest.java @@ -5,64 +5,75 @@ import htsjdk.samtools.SAMException; import org.apache.commons.io.FileUtils; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; +import picard.nio.PicardBucketUtils; import picard.nio.PicardHtsPath; +import picard.nio.PicardIOUtils; import java.io.File; import java.io.IOException; -import java.nio.file.Path; +import java.nio.file.Files; import java.util.ArrayList; import java.util.List; public class BuildBamIndexTest extends CommandLineProgramTest { private static final File TEST_DATA_DIR = new File("testdata/picard/indices/"); - private static final PicardHtsPath INPUT_UNSORTED_SAM = new PicardHtsPath(new File(TEST_DATA_DIR, "index_test.sam").getPath()); + private static final PicardHtsPath INPUT_UNSORTED_SAM = new PicardHtsPath(new File(TEST_DATA_DIR, "index_test.sam")); private static final File EXPECTED_BAI_FILE = new File(TEST_DATA_DIR, "index_test_b.bam.bai"); + private static final String CLOUD_TEST_DATA_DIR = "gs://hellbender/test/resources/picard/BuildBamIndex/"; + private static final String CLOUD_TEST_OUTPUT_DIR = "gs://hellbender-test-logs/staging/picard/test/BuildBamIndex/"; + // tsato: shouldn't we have a constructor for this? + private static final PicardHtsPath INPUT_UNSORTED_SAM_CLOUD = new PicardHtsPath(CLOUD_TEST_DATA_DIR, "index_test.sam"); + + // tsato: replace with variables defined in the other branches once they merge + private static final PicardHtsPath CLOUD_SORTED_CRAM_CLOUD = new PicardHtsPath("gs://hellbender/test/resources/picard/BuildBamIndex/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram"); + public String getCommandLineProgramName() { return BuildBamIndex.class.getSimpleName(); } + @DataProvider(name = "buildBamIndexTestData") + public Object[][] getBuildBamIndexTestData(){ + return new Object[][]{ + {INPUT_UNSORTED_SAM, true}, + {INPUT_UNSORTED_SAM, false}, + {INPUT_UNSORTED_SAM_CLOUD, true}, + {INPUT_UNSORTED_SAM_CLOUD, false}}; + } + + // tsato: must add cram test... + // Test that the index file for a sorted BAM is created - @Test - public void testBuildBamIndexOK() throws IOException { - final IOPath sortedBAM = IOPathUtils.createTempPath("index_test_sorted", ".bam"); - // don't create the output index file, but mark it for deletion - final Path indexOutput = sortedBAM.toPath().resolveSibling("index_test.bam.bai"); - indexOutput.toFile().deleteOnExit(); - - /* First sort, before indexing */ + @Test(dataProvider = "buildBamIndexTestData") + public void testBuildBamIndexOK(final PicardHtsPath inputUnsortedSam, final boolean specifyOutput) throws IOException { + final boolean cloud = ! inputUnsortedSam.isLocalPath(); + final String prefix = "index_test_sorted"; + final PicardHtsPath sortedBAM = cloud ? PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix,".bam") : + PicardBucketUtils.getTempFilePath(null, prefix, ".bam"); + + /* First sort, before indexing */ // tsato: do we need to do this dynamically? new SortSam().instanceMain(new String[]{ - "I=" + INPUT_UNSORTED_SAM, + "I=" + inputUnsortedSam, "O=" + sortedBAM, "SORT_ORDER=coordinate"}); final List args = new ArrayList<>(); args.add("INPUT=" + sortedBAM); - args.add("OUTPUT=" + indexOutput); - runPicardCommandLine(args); - Assert.assertEquals(FileUtils.readFileToByteArray(indexOutput.toFile()), FileUtils.readFileToByteArray(EXPECTED_BAI_FILE)); - } + final PicardHtsPath indexOutput; - // Test that the index file for a sorted BAM is created in the right place if no output file is specified - @Test - public void testBuildBamIndexDefaultOutput() throws IOException { - final IOPath sortedBAM = IOPathUtils.createTempPath("index_test_sorted", ".bam"); - /* First sort, before indexing */ - new SortSam().instanceMain(new String[]{ - "I=" + INPUT_UNSORTED_SAM, - "O=" + sortedBAM, - "SORT_ORDER=coordinate"}); - - // don't create the output index file, but construct the expected name, and mark it for deletion - final String expectedIndexFileName = sortedBAM.getURIString().replace(".bam", ".bai"); - final IOPath indexOutput = new PicardHtsPath(expectedIndexFileName); - indexOutput.toPath().toFile().deleteOnExit(); + if (specifyOutput) { + indexOutput = cloud ? PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix, ".bai") : + PicardBucketUtils.getTempFilePath(null, prefix,".bai"); + args.add("OUTPUT=" + indexOutput); + } else { + indexOutput = PicardHtsPath.replaceExtension(sortedBAM, ".bai", false); + PicardIOUtils.deleteOnExit(indexOutput.toPath()); + } - final List args = new ArrayList<>(); - args.add("INPUT=" + sortedBAM); runPicardCommandLine(args); - Assert.assertEquals(FileUtils.readFileToByteArray(indexOutput.toPath().toFile()), FileUtils.readFileToByteArray(EXPECTED_BAI_FILE)); + Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(EXPECTED_BAI_FILE.toPath())); } // Test that the index creation fails when presented with a SAM file @@ -86,4 +97,26 @@ public void testBuildBamIndexFail() { runPicardCommandLine(args); } + @Test() + public void testCloudCram() throws IOException { + final List args = new ArrayList<>(); + args.add("INPUT=" + CLOUD_SORTED_CRAM_CLOUD); + args.add("REFERENCE_SEQUENCE=" + "gs://hellbender/test/resources/picard/references/human_g1k_v37.20.21.fasta"); // tsato: replace with variable + + final boolean specifyOutput = true; // for now + final PicardHtsPath indexOutput; + final String prefix = "BuildBamIndex_cram_test"; + if (specifyOutput) { + indexOutput = PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix, ".crai"); + args.add("OUTPUT=" + indexOutput); + } else { + indexOutput = PicardHtsPath.replaceExtension(CLOUD_SORTED_CRAM_CLOUD, ".crai", false); + PicardIOUtils.deleteOnExit(indexOutput.toPath()); + } + + runPicardCommandLine(args); + // tsato: gotta fix the suffix of this file... + final PicardHtsPath expectedCramIndex = new PicardHtsPath("gs://hellbender/test/resources/picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram.bai"); + Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(expectedCramIndex.toPath())); + } }