-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpertpipe.py
234 lines (205 loc) · 8.46 KB
/
pertpipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import datetime
import logging
import os
import sys
import warnings
import shutil
from scripts import assists
from scripts import arguments
from scripts import virulence_info
from scripts import mres_blast
from scripts import mres_map
__version__ = "1.0.0"
warnings.simplefilter(action="ignore", category=FutureWarning)
logging.getLogger().setLevel(logging.INFO)
formatter = logging.Formatter(
"pertpipe:%(levelname)s:%(asctime)s: %(message)s", datefmt="%y/%m/%d %I:%M:%S %p"
)
dependency_list = ["abricate", "spades.py", "mlst", "minimap2", "samtools", "bcftools", "prokka", "kallisto"]
ref_list = []
def pertpipe(args):
"""
Running order of pertpipe
"""
now = datetime.datetime.now()
date = now.strftime("%Y%m%d")
is_assembly = bool(args.fasta is not None)
is_reads = bool(args.R1 is not None)
# set outdir defaults - if no outdir is set, it will default to either the fasta or R1 location
if args.outdir is None and args.fasta is not None:
default = os.path.dirname(args.fasta)
outdir = default
elif args.outdir is None and args.R1 is not None:
default = os.path.dirname(args.R1)
outdir = default
else:
outdir = args.outdir
# force creation of new folder within set outdir
maindir = outdir
#newdir = maindir + "/bams"
folder_exists = os.path.exists(maindir)
if not folder_exists:
os.makedirs(maindir)
logging.info("Making output folder")
else:
logging.info(f"Folder exists")
# error log
errorlog = os.path.join(outdir, "pertpipe_" + date + ".log")
# Clear existing handlers
logger = logging.getLogger()
if logger.hasHandlers():
logger.handlers.clear()
stdout_handler = logging.StreamHandler(sys.stdout)
file_handler = logging.FileHandler(errorlog, mode="w+")
for handler in [stdout_handler, file_handler]:
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
logger.addHandler(handler)
# cmd checks
if is_reads is True:
if args.R2 is None:
logging.error("R2 was not provided, please provide the paired reads")
sys.exit(1)
# launch line
logging.info(
"Launching pertpipe v%s and writing output files to directory %s",
__version__,
outdir,
)
# checking file integrity and existence of output directory
if all(item is not None for item in [args.fasta, args.R1, args.R2]):
assists.check_files(args.R1)
assists.check_files(args.R2)
assists.check_files(args.fasta)
logging.info("Found fasta, R1 and R2, skipping assembly")
# checking all the versions and installations of dependencies.
logging.info("Checking installs of dependencies")
for dependency in dependency_list:
assists.check_dependencies(dependency)
if "abricate" in dependency_list:
assists.check_abricate()
elif "mlst" in dependency_list:
assists.check_mlst(args.datadir)
if is_reads and is_assembly is False:
# spades assembly
spades_outdir = maindir + "/spades"
folder_exists = os.path.exists(spades_outdir)
if not folder_exists:
os.makedirs(spades_outdir)
logging.info("Making spades output folder")
else:
logging.info(f"Spades folder exists")
spades_result = assists.check_spades_finished(spades_outdir)
if spades_result is False and args.meta is False:
spades = f"spades.py --careful --only-assembler --pe1-1 {args.R1} --pe1-2 {args.R2} -o {maindir}/spades"
assists.run_cmd(spades)
elif spades_result is False and args.meta is True:
spades = f"spades.py --meta --only-assembler --pe1-1 {args.R1} --pe1-2 {args.R2} -o {maindir}/spades"
assists.run_cmd(spades)
else:
logging.info("Spades has already finished for this sample. Skipping.")
assembly = spades_outdir + "/contigs.fasta"
assists.check_files(assembly)
closed = assists.check_closed_genome(assembly)
elif is_assembly:
assembly = args.fasta
closed = assists.check_closed_genome(assembly)
if is_reads and is_assembly is False and args.meta is True:
megahit_outdir = maindir + "/megahit"
folder_exists = os.path.exists(megahit_outdir)
megahit_result = assists.check_megahit_finished(megahit_outdir)
if folder_exists and megahit_result is False:
logging.info(f"Removing existing failed megahit folder")
shutil.rmtree(megahit_outdir, ignore_errors=True)
if megahit_result is False:
megahit = f"megahit -1 {args.R1} -2 {args.R2} -o {megahit_outdir}"
assists.run_cmd(megahit)
else:
logging.info("Megahit has already finished for this sample. Skipping.")
megahit = megahit_outdir + "/final.contigs.fa"
#assists.megahit_assembly_graphs(megahit_outdir)
assists.check_files(megahit)
#closed = assists.check_closed_genome(assembly)
prokka_outdir = maindir + "/prokka"
folder_exists = os.path.exists(prokka_outdir)
name = os.path.basename(maindir)
if not folder_exists:
os.makedirs(prokka_outdir)
logging.info("Making prokka output folder")
else:
logging.info(f"Prokka folder exists")
prokka_result = assists.check_prokka_finished(prokka_outdir, name)
if prokka_result is False and args.meta is False:
logging.info(f"Running Prokka")
prokka = f"prokka --outdir {prokka_outdir} --force --cpus 8 --prefix {name} --locustag {name} --compliant --gcode 11 {assembly}"
assists.run_cmd(prokka)
elif prokka_result is False and args.meta is True:
logging.info(f"Running Prokka in Metagenomics mode")
prokka = f"prokka --outdir {prokka_outdir} --force --cpus 8 --prefix {name} --locustag {name} --compliant --gcode 11 {assembly} --metagenome"
assists.run_cmd(prokka)
else:
logging.info("Prokka has already finished for this sample. Skipping.")
prn_outdir = maindir + "/analysis"
folder_exists = os.path.exists(prn_outdir)
if not folder_exists:
os.makedirs(prn_outdir)
logging.info("Making analysis output folder")
else:
logging.info(f"Analysis folder exists")
final_dict = {
"Folder": maindir
}
res_dict = virulence_info.virulence_analysis(assembly, prn_outdir, closed, args.datadir, prokka_outdir)
final_dict.update(res_dict)
# 23s rRNA for macrolide resistance
analysis_outdir = maindir + "/analysis"
if args.meta is False:
mutation_list, copies, detected = mres_blast.mres_detection(assembly, analysis_outdir, args.meta)
else:
mutation_list, copies, detected = mres_blast.mres_detection(megahit, analysis_outdir, args.meta)
if is_reads is True:
res_dict = mres_map.mres_map(args.R1, args.R2, analysis_outdir, mutation_list, args.meta)
elif is_assembly:
logging.info(f"Assembly only mode")
if mutation_list != []:
positions = ",".join(mutation_list)
logging.info(f"23s mutation occurs as a {positions} in {copies} copies")
if "A2037G" in mutation_list:
res_dict = {
"Resistance": "Resistant",
"Mutation": positions,
"Copy No": f"{str(copies)} copies",
}
else:
res_dict = {
"Resistance": "Mutations in 23S rRNA detected",
"Mutation": positions,
"Copy No": f"{str(copies)}",
}
else:
res_dict = {
"Resistance": "Susceptible",
"Mutation": "N/A",
"Copy No": "N/A"
}
else:
res_dict = {
"Resistance": "Susceptible",
"Mutation": "N/A",
"Copy No": "N/A"
}
final_dict.update(res_dict)
# Extract headers and values
headers = list(final_dict.keys())
values = list(final_dict.values())
tsv_lines = ["\t".join(headers), "\t".join(values)]
tsv_string = "\n".join(tsv_lines)
output_path = outdir + "/vir_res.tsv"
with open(output_path, 'w') as output_file:
output_file.write(tsv_string)
logging.info(f"Writing information to {output_path}")
logging.info(f"Complete!")
if __name__ == "__main__":
parser = arguments.create_parser() # pylint: disable=E1101
args = parser.parse_args()
pertpipe(args)