Skip to content

Commit

Permalink
Addressed an edge case in ScoreVariantAnnotations that can occur when…
Browse files Browse the repository at this point in the history
… one variant type is not present in the input VCF. (#9112)
  • Loading branch information
samuelklee authored Mar 4, 2025
1 parent 8a032ba commit 6ef2e1f
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -539,12 +539,14 @@ private static void scoreVariantTypeAndSetElementsOfAllScores(final List<String>
final List<Boolean> isVariantType,
final VariantAnnotationsScorer variantTypeScorer,
final List<Double> allScores) {
final File variantTypeAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, allAnnotations, isVariantType);
final File variantTypeScoresFile = IOUtils.createTempFile("temp", ".scores.hdf5");
variantTypeScorer.score(variantTypeAnnotationsFile, variantTypeScoresFile); // TODO we do not fail until here in the case of mismatched annotation names; we could fail earlier
final double[] variantTypeScores = VariantAnnotationsScorer.readScores(variantTypeScoresFile);
final Iterator<Double> variantTypeScoresIterator = Arrays.stream(variantTypeScores).iterator();
IntStream.range(0, allScores.size()).filter(isVariantType::get).forEach(i -> allScores.set(i, variantTypeScoresIterator.next()));
if (isVariantType.stream().anyMatch(x -> x)) {
final File variantTypeAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, allAnnotations, isVariantType);
final File variantTypeScoresFile = IOUtils.createTempFile("temp", ".scores.hdf5");
variantTypeScorer.score(variantTypeAnnotationsFile, variantTypeScoresFile); // TODO we do not fail until here in the case of mismatched annotation names; we could fail earlier
final double[] variantTypeScores = VariantAnnotationsScorer.readScores(variantTypeScoresFile);
final Iterator<Double> variantTypeScoresIterator = Arrays.stream(variantTypeScores).iterator();
IntStream.range(0, allScores.size()).filter(isVariantType::get).forEach(i -> allScores.set(i, variantTypeScoresIterator.next()));
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,36 @@ public void testNoVariantsInInput() {
Assert.assertTrue(new File(outputPrefix + ".vcf.idx").exists());
}

/**
* If no variants of a given type are present in the input in the specified region,
* test that we do not attempt to create the temporary scores or annotations HDF5 files for that type.
* This would result in a failure, because we cannot create HDF5 files with empty arrays/matrices.
*/
@Test(groups = {"python"}) // python environment is required to run tool
public void testNoVariantsOfOneTypeInInput() {
final File outputDir = createTempDir("score");
final String outputPrefix = String.format("%s/test", outputDir);
final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.INDEL)
.add(StandardArgumentDefinitions.INTERVALS_LONG_NAME, "chr1:1-13000") // the test input VCF does not have indels here
.addOutput(outputPrefix);
final String modelPrefix = new File(INPUT_FROM_TRAIN_EXPECTED_TEST_FILES_DIR,
"extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF").toString();
final Function<ArgumentsBuilder, ArgumentsBuilder> addModelPrefix = ab ->
ADD_MODEL_PREFIX.apply(ab, modelPrefix);
addModelPrefix
.andThen(ExtractVariantAnnotationsIntegrationTest.ADD_NON_ALLELE_SPECIFIC_ANNOTATIONS)
.andThen(ExtractVariantAnnotationsIntegrationTest.ADD_SNP_MODE_AND_RESOURCES)
.andThen(ExtractVariantAnnotationsIntegrationTest.ADD_INDEL_MODE_AND_RESOURCES)
.apply(argsBuilder);
runCommandLine(argsBuilder);
Assert.assertTrue(new File(outputPrefix + ScoreVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX).exists());
Assert.assertTrue(new File(outputPrefix + ScoreVariantAnnotations.SCORES_HDF5_SUFFIX).exists());
Assert.assertTrue(new File(outputPrefix + ".vcf").exists());
Assert.assertTrue(new File(outputPrefix + ".vcf.idx").exists());
}

@Test(expectedExceptions = PythonScriptExecutorException.class, groups = {"python"}) // python environment is required to run tool
public void testAnnotationsDoNotMatchThoseUsedToTrainModel() {
final File outputDir = createTempDir("score");
Expand Down

0 comments on commit 6ef2e1f

Please sign in to comment.