Skip to content

Commit

Permalink
test passing for now
Browse files Browse the repository at this point in the history
  • Loading branch information
takutosato committed Jul 20, 2023
1 parent 64f3518 commit 36bc7d1
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,30 @@
*/
public interface ReferenceArgumentCollection {
/**
* @return The reference provided by the user, if any, or the default, if any, as a File. May be null.
*
* tsato: a better name would be getReferenceAsFile
* @return The reference provided by the user or the default as a File. May be null.
*/
File getReferenceFile();
File getReferenceFile(); // tsato: need to aggressively elimnate all code instances where this method is called

/**
* @return The reference provided by the user, if any, or the default, if any, as an nio Path. May be null.
* @return The reference provided by the user or the default as an nio Path. May be null.
*/
default Path getReferencePath() { return getReferenceFile() == null ? null : getReferenceFile().toPath(); }
default Path getReferencePath(){
return getHtsPath().toPath(); // tsato: maybe we don't need this
}

/**
* Tools should access reference file through this method
* tsato: do we need getReferencePath then?
* tsato: what if the reference is not required? i.e. in subclasses other than requiredreferenceArgument...
*
* @return The reference provided by the user, if any, or the default, if any, as a PicardHtsPath. May be null.
*/
default PicardHtsPath getHtsPath() {
return getReferenceFile() == null ?
null :
new PicardHtsPath(getReferenceFile().getAbsolutePath()); } // tsato: this is the issue here...
default PicardHtsPath getHtsPath(){
// tsato: this is for compatibility with legacy code like CollectRRBSMetrics.
return new PicardHtsPath(getReferenceFile());
};

/**
* @return A "safe" way to obtain a File object for any reference path.
Expand All @@ -66,7 +74,7 @@ static File getFileSafe(final PicardHtsPath picardPath, final Log log) {
return null;
} else if (picardPath.getScheme().equals(PicardHtsPath.FILE_SCHEME)) {
// file on a local file system
return picardPath == null ? null : picardPath.toPath().toFile();
return picardPath.toPath().toFile();
} else {
log.warn(String.format(
"The reference specified by %s cannot be used as a local file object",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@
public class RequiredReferenceArgumentCollection implements ReferenceArgumentCollection {
private final static Log log = Log.getInstance(RequiredReferenceArgumentCollection.class);

@Argument(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc = "Reference sequence file.", common = false)
public PicardHtsPath REFERENCE_SEQUENCE;
@Argument(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc = "Reference sequence file.")
public PicardHtsPath REFERENCE_SEQUENCE; // tsato: is the only reason why we instantiate so that

/**
*
* To be called by legacy tools
* @return The reference provided by the user as a File. May be null.
*/
public File getReferenceFile() {
Expand Down
13 changes: 9 additions & 4 deletions src/main/java/picard/sam/CreateSequenceDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ private Iterable<SAMSequenceRecord> getSamSequenceRecordsIterable() {
*/
protected String[] customCommandLineValidation() {
if (URI == null) {
URI = "file:" + referenceSequence.getReferenceFile().getAbsolutePath();
URI = "file:" + referenceSequence.getHtsPath().getURIString();
}
if (OUTPUT == null) {
final Path outputPath = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(referenceSequence.getHtsPath().toPath());
Expand All @@ -194,20 +194,25 @@ protected String[] customCommandLineValidation() {
}

// return a custom argument collection because this tool uses the argument name
// "REFERENCE" instead of "REFERENCE_SEQUENCE"
// "REFERENCE" instead of "REFERENCE_SEQUENCE" // tsato: can we starndardize this?
@Override
protected ReferenceArgumentCollection makeReferenceArgumentCollection() {
return new CreateSeqDictReferenceArgumentCollection();
}

public static class CreateSeqDictReferenceArgumentCollection implements ReferenceArgumentCollection {
@Argument(doc = "Input reference fasta or fasta.gz", shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME)
public File REFERENCE;
public PicardHtsPath REFERENCE;

@Override
public File getReferenceFile() {
public PicardHtsPath getHtsPath() {
return REFERENCE;
}

@Override
public File getReferenceFile() {
return ReferenceArgumentCollection.getFileSafe(REFERENCE, logger);
}
}

/**
Expand Down
25 changes: 16 additions & 9 deletions src/main/java/picard/sam/DownsampleSam.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.argumentcollections.RequiredReferenceArgumentCollection;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
import picard.nio.PicardHtsPath;

import java.io.File;
import java.text.DecimalFormat;
Expand Down Expand Up @@ -171,10 +173,10 @@ public class DownsampleSam extends CommandLineProgram {
" P=0.00001 \\\n" +
" ACCURACY=0.0000001\n";
@Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM or BAM file to downsample.")
public File INPUT;
public PicardHtsPath INPUT;

@Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output, downsampled, SAM, BAM or CRAM file to write.")
public File OUTPUT;
public PicardHtsPath OUTPUT;

@Argument(shortName="S", doc="The downsampling strategy to use. See usage for discussion.")
public Strategy STRATEGY = Strategy.ConstantMemory;
Expand Down Expand Up @@ -210,8 +212,8 @@ protected String[] customCommandLineValidation() {

@Override
protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
IOUtil.assertFileIsReadable(INPUT.toPath());
// IOUtil.assertFileIsWritable(OUTPUT); // tsato: writability check has to go

// Warn the user if they are running with P=1 or P=0 (which are legal, but odd)
if (PROBABILITY == 1) {
Expand All @@ -228,7 +230,7 @@ protected int doWork() {
"Drawing a random seed because RANDOM_SEED was not set. Set RANDOM_SEED to %s to reproduce these results in the future.", RANDOM_SEED));
}

final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SamInputResource.of(INPUT));
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SamInputResource.of(INPUT.toPath()));
final SAMFileHeader header = in.getFileHeader().clone();

if (STRATEGY == Strategy.ConstantMemory || STRATEGY == Strategy.Chained) {
Expand Down Expand Up @@ -266,7 +268,7 @@ protected int doWork() {
final SAMProgramRecord pgRecord = getPGRecord(header);
pgRecord.setAttribute(RANDOM_SEED_TAG, RANDOM_SEED.toString());
header.addProgramRecord(pgRecord);
final SAMFileWriter out = new SAMFileWriterFactory().makeWriter(header, true, OUTPUT, referenceSequence.getReferenceFile());
final SAMFileWriter out = new SAMFileWriterFactory().makeWriter(header, true, OUTPUT.toPath(), referenceSequence.getHtsPath().toPath());
final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Wrote");
final DownsamplingIterator iterator = DownsamplingIteratorFactory.make(in, STRATEGY, PROBABILITY, ACCURACY, RANDOM_SEED);
final QualityYieldMetricsCollector metricsCollector = new QualityYieldMetricsCollector(true, false, false);
Expand Down Expand Up @@ -296,13 +298,18 @@ protected int doWork() {

@Override
protected ReferenceArgumentCollection makeReferenceArgumentCollection() {
// Override to allow "R" to be hijacked for "RANDOM_SEED"
// Override to allow "R" to be hijacked for "RANDOM_SEED" (tsato: good call)
return new ReferenceArgumentCollection() {
@Argument(doc = "The reference sequence file.", optional=true, common=false)
public File REFERENCE_SEQUENCE;
public PicardHtsPath REFERENCE_SEQUENCE;

@Override
public File getReferenceFile() {
public File getReferenceFile() { // tsato: this should be replaced by getHtsPath
return ReferenceArgumentCollection.getFileSafe(REFERENCE_SEQUENCE, log);
}

@Override
public PicardHtsPath getHtsPath() { // tsato: this should be replaced by getHtsPath
return REFERENCE_SEQUENCE;
}
};
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/picard/util/SequenceDictionaryUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public class SequenceDictionaryUtils {

static public class SamSequenceRecordsIterator implements Iterator<SAMSequenceRecord> {
final private boolean truncateNamesAtWhitespace;
final private ReferenceSequenceFile refSeqFile;
final private ReferenceSequenceFile refSeqFile; // tsato: uh oh
private String genomeAssembly;
private String uri;

Expand Down
10 changes: 6 additions & 4 deletions src/test/java/picard/sam/CreateSequenceDictionaryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.testng.annotations.Test;
import picard.PicardException;
import picard.cmdline.CommandLineProgramTest;
import picard.nio.GATKBucketUtils;
import picard.nio.PicardHtsPath;
import picard.util.GCloudTestUtils;

Expand Down Expand Up @@ -283,19 +284,20 @@ final Path makeTempDictionary(final Path inputFasta, final String dictNamePrefix
return tempDict.toPath();
}

@Test
@Test(groups = "cloud")
public void testCloud() throws Exception {
final String HG19_MINI = GCloudTestUtils.getTestInputPath() + "picard/references/hg19mini.fasta";
// Ideal to copy the file over to staging and test
final String CLOUD_OUTPUT_DIR = GCloudTestUtils.getTestStaging() + "picard/";
final PicardHtsPath output = new PicardHtsPath(CLOUD_OUTPUT_DIR + "test.dict");
final String output = GATKBucketUtils.getTempFilePath(CLOUD_OUTPUT_DIR + "test", ".dict");
// final PicardHtsPath output2 = new PicardHtsPath(CLOUD_OUTPUT_DIR + "test.dict");
// final Path output2 = Files.createTempFile(Paths.get(CLOUD_OUTPUT_DIR) + "tmp", ".tmp");

final String[] argv = {
"REFERENCE=" + HG19_MINI,
"OUTPUT=" + output.getURIString(),
"OUTPUT=" + output,
};
Assert.assertEquals(runPicardCommandLine(argv), 0);
Files.delete(output.toPath());
int d = 3;
}
}

0 comments on commit 36bc7d1

Please sign in to comment.