Skip to content

Commit f8a6416

Browse files
authoredDec 2, 2021
Parse quast report to csv (#16)
* Parse quast report to csv * Update README, fix version parsing
1 parent 7b2c6f6 commit f8a6416

File tree

4 files changed

+46
-21
lines changed

4 files changed

+46
-21
lines changed
 

‎README.md

+12-13
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,9 @@ sample-01
6262
├── sample-01_20211125165316_provenance.yml
6363
├── sample-01_fastp.csv
6464
├── sample-01_fastp.json
65-
├── sample-01_prokka.gbk
66-
├── sample-01_prokka.gff
67-
├── sample-01_quast.json
68-
├── sample-01_quast.tsv
65+
├── sample-01_shovill_prokka.gbk
66+
├── sample-01_shovill_prokka.gff
67+
├── sample-01_shovill_quast.csv
6968
├── sample-01_shovill.fa
7069
└── sample-01_shovill.log
7170
```
@@ -76,16 +75,16 @@ Including the tool name suffixes to output files allows re-analysis of the same
7675
sample-01
7776
├── sample-01_20211125165316_provenance.yml
7877
├── sample-01_20211128122118_provenance.yml
79-
├── sample-01_bakta.gbk
80-
├── sample-01_bakta.gff
81-
├── sample-01_bakta.json
82-
├── sample-01_bakta.log
78+
├── sample-01_unicycler_bakta.gbk
79+
├── sample-01_unicycler_bakta.gff
80+
├── sample-01_unicycler_bakta.json
81+
├── sample-01_unicycler_bakta.log
8382
├── sample-01_fastp.csv
8483
├── sample-01_fastp.json
85-
├── sample-01_prokka.gbk
86-
├── sample-01_prokka.gff
87-
├── sample-01_quast.json
88-
├── sample-01_quast.tsv
84+
├── sample-01_shovill_prokka.gbk
85+
├── sample-01_shovill_prokka.gff
86+
├── sample-01_shovill_quast.csv
87+
├── sample-01_unicycler_quast.csv
8988
├── sample-01_shovill.fa
9089
├── sample-01_shovill.log
9190
├── sample-01_unicycler.fa
@@ -104,7 +103,7 @@ For each pipeline invocation, each sample will produce a `provenance.yml` file w
104103
- tool_name: prokka
105104
tool_version: 1.14.5
106105
- tool_name: quast
107-
tool_version: v5.0.2
106+
tool_version: 5.0.2
108107
- input_filename: sample-01_R1.fastq.gz
109108
sha256: 4ac3055ac5f03114a005aff033e7018ea98486cbebdae669880e3f0511ed21bb
110109
- input_filename: sample-01_R2.fastq.gz

‎bin/parse_quast_report.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import collections
55
import csv
66
import json
7+
import sys
78

89

910
def parse_transposed_quast_report(transposed_quast_report_path):
@@ -92,8 +93,35 @@ def main():
9293
parser.add_argument('transposed_quast_report')
9394
args = parser.parse_args()
9495

96+
output_fieldnames = [
97+
'assembly_id',
98+
'total_length',
99+
'num_contigs',
100+
'largest_contig',
101+
'assembly_N50',
102+
'assembly_N75',
103+
'assembly_L50',
104+
'assembly_L75',
105+
'num_contigs_gt_0_bp',
106+
'num_contigs_gt_1000_bp',
107+
'num_contigs_gt_5000_bp',
108+
'num_contigs_gt_10000_bp',
109+
'num_contigs_gt_25000_bp',
110+
'num_contigs_gt_50000_bp',
111+
'total_length_gt_0_bp',
112+
'total_length_gt_1000_bp',
113+
'total_length_gt_5000_bp',
114+
'total_length_gt_10000_bp',
115+
'total_length_gt_25000_bp',
116+
'total_length_gt_50000_bp',
117+
'num_N_per_100_kb',
118+
]
119+
95120
report = parse_transposed_quast_report(args.transposed_quast_report)
96-
print(json.dumps(report, indent=2))
121+
writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames)
122+
writer.writeheader()
123+
for record in report:
124+
writer.writerow(record)
97125

98126

99127
if __name__ == '__main__':

‎modules/quast.nf

+4-6
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ process quast {
22

33
tag { sample_id }
44

5-
publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_quast.tsv", mode: 'copy'
6-
75
input:
86
tuple val(sample_id), path(assembly), val(assembler)
97

@@ -13,7 +11,7 @@ process quast {
1311

1412
script:
1513
"""
16-
printf -- "- tool_name: quast\\n tool_version: \$(quast --version | cut -d ' ' -f 2)\\n" > ${sample_id}_${assembler}_quast_provenance.yml
14+
printf -- "- tool_name: quast\\n tool_version: \$(quast --version | cut -d ' ' -f 2 | tr -d 'v')\\n" > ${sample_id}_${assembler}_quast_provenance.yml
1715
quast --threads ${task.cpus} ${assembly} --space-efficient --fast --output-dir ${sample_id}
1816
mv ${sample_id}/transposed_report.tsv ${sample_id}_${assembler}_quast.tsv
1917
"""
@@ -25,16 +23,16 @@ process parse_quast_report {
2523

2624
executor 'local'
2725

28-
publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_quast.json", mode: 'copy'
26+
publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_quast.csv", mode: 'copy'
2927

3028
input:
3129
tuple val(sample_id), path(quast_report), val(assembler)
3230

3331
output:
34-
tuple val(sample_id), path("${sample_id}_${assembler}_quast.json")
32+
tuple val(sample_id), path("${sample_id}_${assembler}_quast.csv")
3533

3634
script:
3735
"""
38-
parse_quast_report.py ${quast_report} > ${sample_id}_${assembler}_quast.json
36+
parse_quast_report.py ${quast_report} > ${sample_id}_${assembler}_quast.csv
3937
"""
4038
}

‎modules/unicycler.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ process unicycler {
1515

1616
script:
1717
"""
18-
printf -- "- tool_name: unicycler\\n tool_version: \$(unicycler --version | cut -d ' ' -f 2)\\n" > ${sample_id}_unicycler_provenance.yml
18+
printf -- "- tool_name: unicycler\\n tool_version: \$(unicycler --version | cut -d ' ' -f 2 | tr -d 'v')\\n" > ${sample_id}_unicycler_provenance.yml
1919
unicycler --threads ${task.cpus} -1 ${reads_1} -2 ${reads_2} -o ${sample_id}_assembly
2020
sed 's/^>/>${sample_id}_/' ${sample_id}_assembly/assembly.fasta > ${sample_id}_unicycler.fa
2121
cp ${sample_id}_assembly/assembly.gfa ${sample_id}_unicycler.gfa

0 commit comments

Comments
 (0)
Please sign in to comment.