Skip to content

Commit

Permalink
Merge pull request #411 from broadinstitute/pangolin
Browse files Browse the repository at this point in the history
pangolin v4
  • Loading branch information
dpark01 authored Apr 8, 2022
2 parents 764c9fd + a233a5f commit 24d5fc5
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 35 deletions.
59 changes: 29 additions & 30 deletions pipes/WDL/tasks/tasks_sarscov2.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ task pangolin_one_sample {
File genome_fasta
Int? min_length
Float? max_ambig
Boolean inference_usher=true
String? analysis_mode
Boolean update_dbs_now=false
String docker = "quay.io/staphb/pangolin:3.1.20-pangolearn-2022-02-28"
String docker = "quay.io/staphb/pangolin:4.0.4-pdata-1.2.133"
}
String basename = basename(genome_fasta, ".fasta")
command <<<
Expand All @@ -23,27 +23,24 @@ task pangolin_one_sample {
set -e
fi
date | tee DATE
conda list -n pangolin | grep "usher" | awk -F ' +' '{print$1, $2}' | tee VERSION_PANGO_USHER
pangolin -v | tee VERSION_PANGOLIN
pangolin -pv | tee VERSION_PANGOLEARN
pangolin --all-versions | tr '\n' ';' | cut -f -5 -d ';' | tee VERSION_PANGOLIN_ALL
{ pangolin --all-versions && usher --version; } | grep -v '\*\*\*\*' | grep -v "Pangolin running in" | tr '\n' ';' | cut -f -6 -d ';' | tee VERSION_PANGOLIN_ALL

pangolin "~{genome_fasta}" \
~{true='--usher' false='' inference_usher} \
~{'--analysis-mode ' + analysis_mode} \
--outfile "~{basename}.pangolin_report.csv" \
~{"--min-length " + min_length} \
~{"--max-ambig " + max_ambig} \
--alignment \
--threads $(nproc) \
--verbose

cp sequences.aln.fasta "~{basename}.pangolin_msa.fasta"
cp alignment.fasta "~{basename}.pangolin_msa.fasta"
python3 <<CODE
import csv
#grab output values by column header
with open("~{basename}.pangolin_report.csv", 'rt') as csv_file:
for line in csv.DictReader(csv_file):
with open("VERSION", 'wt') as outf:
with open("PANGO_ASSIGNMENT_VERSION", 'wt') as outf:
pangolin_version=line["pangolin_version"]
version=line["version"]
outf.write(f"pangolin {pangolin_version}; {version}")
Expand All @@ -53,6 +50,10 @@ task pangolin_one_sample {
outf.write(line["conflict"])
with open("PANGOLIN_NOTES", 'wt') as outf:
outf.write(line["note"])
with open("SCORPIO_CALL", 'wt') as outf:
outf.write(line["scorpio_call"])
with open("SCORPIO_NOTES", 'wt') as outf:
outf.write(line["scorpio_notes"])
break
CODE
>>>
Expand All @@ -66,15 +67,14 @@ task pangolin_one_sample {
}
output {
String date = read_string("DATE")
String version = read_string("VERSION")
String pango_lineage = read_string("PANGO_LINEAGE")
String pangolin_conflicts = read_string("PANGOLIN_CONFLICTS")
String pangolin_notes = read_string("PANGOLIN_NOTES")
String pangolin_usher_version = read_string("VERSION_PANGO_USHER")
String pangolin_version = read_string("VERSION_PANGOLIN")
String pangolearn_version = read_string("VERSION_PANGOLEARN")
String scorpio_call = read_string("SCORPIO_CALL")
String scorpio_notes = read_string("SCORPIO_NOTES")
String pangolin_docker = docker
String pangolin_versions = read_string("VERSION_PANGOLIN_ALL")
String pangolin_assignment_version = read_string("PANGO_ASSIGNMENT_VERSION")
File pango_lineage_report = "${basename}.pangolin_report.csv"
File msa_fasta = "~{basename}.pangolin_msa.fasta"
}
Expand All @@ -88,10 +88,10 @@ task pangolin_many_samples {
Array[File]+ genome_fastas
Int? min_length
Float? max_ambig
Boolean inference_usher=true
String? analysis_mode
Boolean update_dbs_now=false
String basename
String docker = "quay.io/staphb/pangolin:3.1.20-pangolearn-2022-02-28"
String docker = "quay.io/staphb/pangolin:4.0.4-pdata-1.2.133"
}
command <<<
set -ex
Expand All @@ -103,37 +103,35 @@ task pangolin_many_samples {
set -e
fi
date | tee DATE
conda list -n pangolin | grep "usher" | awk -F ' +' '{print$1, $2}' | tee VERSION_PANGO_USHER
pangolin -v | tee VERSION_PANGOLIN
pangolin -pv | tee VERSION_PANGOLEARN
pangolin --all-versions | tr '\n' ';' | cut -f -5 -d ';' | tee VERSION_PANGOLIN_ALL
{ pangolin --all-versions && usher --version; } | grep -v '\*\*\*\*' | grep -v "Pangolin running in" | tr '\n' ';' | cut -f -6 -d ';' | tee VERSION_PANGOLIN_ALL
cat ~{sep=" " genome_fastas} > unaligned.fasta
pangolin unaligned.fasta \
~{true='--usher' false='' inference_usher} \
--use-assignment-cache \
~{'--analysis-mode ' + analysis_mode} \
--outfile "~{basename}.pangolin_report.csv" \
~{"--min-length " + min_length} \
~{"--max-ambig " + max_ambig} \
--alignment \
--threads $(nproc) \
--verbose
cp sequences.aln.fasta "~{basename}.pangolin_msa.fasta"
cp alignment.fasta "~{basename}.pangolin_msa.fasta"
python3 <<CODE
import csv, json
#grab output values by column header
with open("~{basename}.pangolin_report.csv", 'rt') as csv_file:
for line in csv.DictReader(csv_file):
with open("VERSION", 'wt') as outf:
with open("PANGO_ASSIGNMENT_VERSION", 'wt') as outf:
pangolin_version=line["pangolin_version"]
version=line["version"]
outf.write(f"pangolin {pangolin_version}; {version}")
break
out_maps = {'lineage':{}, 'conflict':{}, 'note':{}}
out_maps = {'lineage':{}, 'conflict':{}, 'note':{}, 'scorpio_call':{}, 'scorpio_notes':{}}
with open("~{basename}.pangolin_report.csv", 'rt') as csv_file:
with open('IDLIST', 'wt') as outf_ids:
for row in csv.DictReader(csv_file):
for k in ('lineage','conflict','note'):
for k in ('lineage','conflict','note','scorpio_call','scorpio_notes'):
out_maps[k][row['taxon']] = row[k]
outf_ids.write(row['taxon']+'\n')
with open('PANGO_LINEAGE.json', 'wt') as outf:
Expand All @@ -142,12 +140,15 @@ task pangolin_many_samples {
json.dump(out_maps['conflict'], outf)
with open('PANGOLIN_NOTES.json', 'wt') as outf:
json.dump(out_maps['note'], outf)
with open('SCORPIO_CALL.json', 'wt') as outf:
json.dump(out_maps['scorpio_call'], outf)
with open('SCORPIO_NOTES.json', 'wt') as outf:
json.dump(out_maps['scorpio_notes'], outf)
CODE
# gather runtime metrics
cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
cat /proc/loadavg > CPU_LOAD
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES
>>>
runtime {
docker: docker
Expand All @@ -161,17 +162,15 @@ task pangolin_many_samples {
Map[String,String] pango_lineage = read_json("PANGO_LINEAGE.json")
Map[String,String] pangolin_conflicts = read_json("PANGOLIN_CONFLICTS.json")
Map[String,String] pangolin_notes = read_json("PANGOLIN_NOTES.json")
Map[String,String] scorpio_call = read_json("SCORPIO_CALL.json")
Map[String,String] scorpio_notes = read_json("SCORPIO_NOTES.json")
Array[String] genome_ids = read_lines("IDLIST")
String date = read_string("DATE")
String version = read_string("VERSION")
String pangolin_assignment_version = read_string("PANGO_ASSIGNMENT_VERSION")
String pangolin_docker = docker
String pangolin_versions = read_string("VERSION_PANGOLIN_ALL")
String pangolin_usher_version = read_string("VERSION_PANGO_USHER")
String pangolin_version = read_string("VERSION_PANGOLIN")
String pangolearn_version = read_string("VERSION_PANGOLEARN")
File pango_lineage_report = "${basename}.pangolin_report.csv"
File msa_fasta = "~{basename}.pangolin_msa.fasta"
Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000)
Int runtime_sec = ceil(read_float("UPTIME_SEC"))
String cpu_load = read_string("CPU_LOAD")
}
Expand Down
4 changes: 3 additions & 1 deletion pipes/WDL/workflows/sarscov2_batch_relineage.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ workflow sarscov2_batch_relineage {
Array[String] metadata = [
sample_sanitized,
pangolin_many_samples.pango_lineage[sample_sanitized],
pangolin_many_samples.scorpio_call[sample_sanitized],
nextclade_many_samples.nextclade_clade[sample_sanitized],
nextclade_many_samples.aa_subs_csv[sample_sanitized],
nextclade_many_samples.aa_dels_csv[sample_sanitized],
Expand All @@ -62,7 +63,8 @@ workflow sarscov2_batch_relineage {
}
Array[String] meta_header = [
'sample_sanitized',
'pango_lineage', 'nextclade_clade', 'nextclade_aa_subs', 'nextclade_aa_dels',
'pango_lineage', 'scorpio_call',
'nextclade_clade', 'nextclade_aa_subs', 'nextclade_aa_dels',
'pangolin_version', 'nextclade_version'
]
Expand Down
5 changes: 2 additions & 3 deletions pipes/WDL/workflows/sarscov2_lineages.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,10 @@ workflow sarscov2_lineages {
String pango_lineage = pangolin_one_sample.pango_lineage
String pangolin_conflicts = pangolin_one_sample.pangolin_conflicts
String pangolin_notes = pangolin_one_sample.pangolin_notes
String scorpio_call = pangolin_one_sample.scorpio_call
String scorpio_notes = pangolin_one_sample.scorpio_notes
File pango_lineage_report = pangolin_one_sample.pango_lineage_report
String pangolin_usher_version = pangolin_one_sample.pangolin_usher_version
String pangolin_docker = pangolin_one_sample.pangolin_docker
String pangolin_version = pangolin_one_sample.pangolin_version
String pangolearn_version = pangolin_one_sample.pangolearn_version
String pangolin_versions = pangolin_one_sample.pangolin_versions
}
}
2 changes: 1 addition & 1 deletion requirements-modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ broadinstitute/beast-beagle-cuda=1.10.5pre
broadinstitute/ncbi-tools=2.10.7.10
nextstrain/base=build-20211012T204409Z
andersenlabapps/ivar=1.3.1
quay.io/staphb/pangolin=3.1.20-pangolearn-2022-02-28
quay.io/staphb/pangolin=4.0.4-pdata-1.2.133
nextstrain/nextclade=1.11.0

0 comments on commit 24d5fc5

Please sign in to comment.