Mercurial > repos > iss > eurl_vtec_wgs_pt
comparison EURL_VTEC_WGS_PT.py @ 0:c6bab5103a14 draft
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author | iss |
---|---|
date | Mon, 21 Mar 2022 15:23:09 +0000 |
parents | |
children | 444b0421bbdc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c6bab5103a14 |
---|---|
1 #!/usr/bin/env python3 | |
2 # -*- coding: utf-8 -*- | |
3 """ | |
4 ############################################################################ | |
5 # Istituto Superiore di Sanita' | |
6 # European Union Reference Laboratory (EU-RL) for Escherichia coli, including Verotoxigenic E. coli (VTEC) | |
7 # Developer: Arnold Knijn arnold.knijn@iss.it | |
8 ############################################################################ | |
9 """ | |
10 | |
11 import argparse | |
12 import sys | |
13 import os | |
14 import shutil | |
15 import subprocess | |
16 import HTML | |
17 import datetime | |
18 import fileinput | |
19 | |
20 BASE_URL = 'https://aries.iss.it' | |
21 TOOL_DIR = os.path.dirname(os.path.abspath(__file__)) | |
22 | |
23 def insertFile(filename, report): | |
24 with open(filename) as html_in: | |
25 for line in html_in: | |
26 report.write(line) | |
27 | |
28 def insertFileAsTable(filename, report, hasheader=False, tabclass="table table-rep"): | |
29 with open(filename) as table_in: | |
30 table_data = [[str(col) for col in row.split('\t')] for row in table_in] | |
31 insertTable(table_data, report, hasheader, tabclass) | |
32 | |
33 def insertTable(table_data, report, hasheader=False, tabclass="table table-rep"): | |
34 if hasheader: | |
35 htmlcode = HTML.table(table_data[1:], attribs={'class':tabclass}, header_row=table_data[0]) | |
36 else: | |
37 htmlcode = HTML.table(table_data, attribs={'class':tabclass}) | |
38 report.write(htmlcode) | |
39 | |
40 def openFileAsTable(filename): | |
41 with open(filename) as table_in: | |
42 table_data = [[str(col).rstrip() for col in row.split('\t')] for row in table_in] | |
43 return table_data | |
44 | |
45 def __main__(): | |
46 parser = argparse.ArgumentParser() | |
47 parser.add_argument('--serotyping', dest='serotyping', help='perform serotyping', action='store_true') | |
48 parser.add_argument('--virulotyping', dest='virulotyping', help='perform virulotyping', action='store_true') | |
49 parser.add_argument('--shigatoxintyping', dest='shigatoxintyping', help='perform shigatoxintyping', action='store_true') | |
50 parser.add_argument('--amrtyping', dest='amrtyping', help='perform amrtyping', action='store_true') | |
51 parser.add_argument('-1', '--input1', dest='input1', help='forward or single-end reads file in Sanger FASTQ format') | |
52 parser.add_argument('--input1_ext', dest='input1_ext', help='extension of forward or single-end reads file in Sanger FASTQ format') | |
53 parser.add_argument('--input1_name', dest='input1_name', help='name of forward or single-end reads file in Sanger FASTQ format') | |
54 parser.add_argument('-2', '--input2', dest='input2', help='reverse reads file in Sanger FASTQ format') | |
55 parser.add_argument('--input2_ext', dest='input2_ext', help='extension of reverse reads file in Sanger FASTQ format') | |
56 parser.add_argument('--input2_name', dest='input2_name', help='name of reverse reads file in Sanger FASTQ format') | |
57 parser.add_argument('--html1', dest='html1', help='html FASTQC file') | |
58 parser.add_argument('--html1_id', dest='html1_id', help='html FASTQC file id') | |
59 parser.add_argument('--html1_path', dest='html1_path', help='html FASTQC file path') | |
60 parser.add_argument('--text1', dest='text1', help='text FASTQC file') | |
61 parser.add_argument('--html2', dest='html2', help='html FASTQC file') | |
62 parser.add_argument('--html2_id', dest='html2_id', help='html FASTQC file id') | |
63 parser.add_argument('--html2_path', dest='html2_path', help='html FASTQC file path') | |
64 parser.add_argument('--text2', dest='text2', help='text FASTQC file') | |
65 parser.add_argument('--contigs', dest='contigs', help='Assembly contigs') | |
66 parser.add_argument('--quast', dest='quast', help='Quast report') | |
67 parser.add_argument('--log', dest='logfile', help='log file') | |
68 parser.add_argument('--virulotyper', dest='virulotyper', help='Virulotyping Mapping reads') | |
69 parser.add_argument('--virulotyper_id', dest='virulotyper_id', help='Virulotyping Mapping reads id') | |
70 parser.add_argument('--stx', dest='stx', help='Shiga toxin') | |
71 parser.add_argument('--mlstsevenloci', dest='mlstsevenloci', help='Multi Locus Alleles table') | |
72 parser.add_argument('--amr', dest='amrgenes', help='AMR genes') | |
73 parser.add_argument('--amr_id', dest='amr_id', help='AMR file id') | |
74 parser.add_argument('--antigen_O', dest='antigen_O', help='Antigen for O') | |
75 parser.add_argument('--antigen_H', dest='antigen_H', help='Antigen for H') | |
76 parser.add_argument('--output', dest='output', help='output report html file') | |
77 args = parser.parse_args() | |
78 | |
79 log = open(args.logfile, 'w') | |
80 log.write("EURL VTEC WGS PT v3.2\n\nTool versions\n=============\n") | |
81 os.system("ln -s $(readlink -e $(which trimmomatic)).jar trimmomatic.jar") | |
82 # FASTQC | |
83 subprocess.call("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input1 + " -d " + args.html1_path + " -o " + args.html1 + " -t " + args.text1 + " -f " + args.input1_ext + " -j " + args.input1_name + " -e " + "fastqc", shell=True) | |
84 log.write(os.popen("fastqc -v").read()) | |
85 if args.input2: | |
86 # FASTQC | |
87 subprocess.call("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input2 + " -d " + args.html2_path + " -o " + args.html2 + " -t " + args.text2 + " -f " + args.input2_ext + " -j " + args.input2_name + " -e " + "fastqc", shell=True) | |
88 # TRIMMING | |
89 subprocess.call("java ${_JAVA_OPTIONS:--Xmx8G} -jar trimmomatic.jar PE -threads ${GALAXY_SLOTS:-6} -phred33 " + args.input1 + " " + args.input2 + " trimmed1.fq trimmed1unpaired trimmed2.fq trimmed2unpaired SLIDINGWINDOW:5:20 LEADING:3 TRAILING:3 MINLEN:36", shell=True) | |
90 log.write("\nTrimmomatic v0.39\n") | |
91 log.write("parameters: phred33 SLIDINGWINDOW:5:20 LEADING:3 TRAILING:3 MINLEN:36\n\n") | |
92 # ASSEMBLY | |
93 subprocess.call("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs spades_contig_stats spades_scaffolds spades_scaffold_stats spades_log NODE spades.py --disable-gzip-output --isolate -t ${GALAXY_SLOTS:-16} --pe1-ff --pe1-1 trimmed1.fq --pe1-2 trimmed2.fq", shell=True) | |
94 subprocess.call("perl " + TOOL_DIR + "/scripts/filter_spades_repeats.pl -i spades_contigs -t spades_contig_stats -c 0.33 -r 1.75 -l 1000 -o output_with_repeats -u output_without_repeats -n repeat_sequences_only -e 5000 -f discarded_sequences -s summary", shell=True) | |
95 shutil.move("output_without_repeats", args.contigs) | |
96 log.write(os.popen("spades.py -v").read()) | |
97 log.write("parameters: --isolate, pe1-ff, pe1-1, pe1-2 filter_repeats\n\n") | |
98 else: | |
99 # TRIMMING | |
100 subprocess.call("java ${_JAVA_OPTIONS:--Xmx8G} -jar trimmomatic.jar SE -threads ${GALAXY_SLOTS:-6} -phred33 " + args.input1 + " trimmed1.fq SLIDINGWINDOW:5:20 LEADING:3 TRAILING:3 MINLEN:55", shell=True) | |
101 log.write("\nTrimmomatic v0.39\n") | |
102 log.write("parameters: phred33 SLIDINGWINDOW:5:20 LEADING:3 TRAILING:3 MINLEN:55\n\n") | |
103 # ASSEMBLY | |
104 subprocess.call("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs spades_contig_stats spades_scaffolds spades_scaffold_stats spades_log NODE spades.py --disable-gzip-output --isolate -t ${GALAXY_SLOTS:-16} --iontorrent -s trimmed1.fq", shell=True) | |
105 subprocess.call("perl " + TOOL_DIR + "/scripts/filter_spades_repeats.pl -i spades_contigs -t spades_contig_stats -c 0.33 -r 1.75 -l 1000 -o output_with_repeats -u output_without_repeats -n repeat_sequences_only -e 5000 -f discarded_sequences -s summary", shell=True) | |
106 shutil.move("output_without_repeats", args.contigs) | |
107 log.write(os.popen("spades.py -v").read()) | |
108 log.write("parameters: --isolate, --iontorrent filter_repeats\n\n") | |
109 # QUAST | |
110 subprocess.call("quast --threads 4 -o outputdir --est-ref-size 5000000 --min-contig 500 -l '" + args.input1_name + "' --contig-thresholds 0,1000 " + args.contigs, shell=True) | |
111 shutil.move("outputdir/report.tsv", args.quast) | |
112 if args.virulotyping: | |
113 # VIRULOTYPER | |
114 if args.input2: | |
115 subprocess.call("perl " + TOOL_DIR + "/scripts/patho_typing.pl 'python " + TOOL_DIR + "/scripts/patho_typing.py -s Escherichia coli -f " + args.input1 + " " + args.input2 + " -o output_dir -j 4 --minGeneCoverage 90 --minGeneIdentity 90 --minGeneDepth 15'", shell=True) | |
116 else: | |
117 subprocess.call("perl " + TOOL_DIR + "/scripts/patho_typing.pl 'python " + TOOL_DIR + "/scripts/patho_typing.py -s Escherichia coli -f " + args.input1 + " -o output_dir -j 4 --minGeneCoverage 90 --minGeneIdentity 90 --minGeneDepth 15'", shell=True) | |
118 subprocess.call("(head -n 1 pathotyper_rep_tot_tab && tail -n +2 pathotyper_rep_tot_tab | sort -k 2rn) > " + args.virulotyper, shell=True) | |
119 log.write("\n\nViruloTyper\n===========\npatho_typing v1.0\n") | |
120 log.write("parameters: minGeneCoverage=90, minGeneIdentity=90, minGeneDepth=15\n\n") | |
121 log.write(os.popen("cat " + TOOL_DIR + "/data/ViruloTyping_db.txt").read()) | |
122 # SEQUENCETYPER | |
123 subprocess.call("mlst --legacy --scheme ecoli " + args.contigs + " | cut -f3,4,5,6,7,8,9,10 > " + args.mlstsevenloci, shell=True) | |
124 sequence_typing = openFileAsTable(args.mlstsevenloci) | |
125 log.write("\n\nSequence Typer\n==============\n") | |
126 log.write(os.popen("mlst -v").read()) | |
127 log.write("\n") | |
128 log.write(os.popen("cat " + TOOL_DIR + "/data/SequenceTyping_db.txt").read()) | |
129 if args.shigatoxintyping: | |
130 # SHIGATOXIN TYPER | |
131 if args.input2: | |
132 # CONSENSUS | |
133 subprocess.call("sh " + TOOL_DIR + "/scripts/stx_subtype_pe.sh " + TOOL_DIR + " trimmed1.fq trimmed2.fq " + args.contigs, shell=True) | |
134 else: | |
135 # CONSENSUS | |
136 subprocess.call("sh " + TOOL_DIR + "/scripts/stx_subtype_se.sh " + TOOL_DIR + " trimmed1.fq " + args.contigs, shell=True) | |
137 # SHIGATOXIN SEQUENCE SEARCH | |
138 subprocess.call("sh " + TOOL_DIR + "/scripts/stx_subtype_fa.sh " + TOOL_DIR + " stx.fasta", shell=True) | |
139 subprocess.call("echo 'sseqid\tpident\tlength\tpositive' > shigatoxin_fct", shell=True) | |
140 subprocess.call("cat shigatoxin_fc >> shigatoxin_fct", shell=True) | |
141 shutil.move("shigatoxin_fct", args.stx) | |
142 shigatoxin_typing = openFileAsTable("shigatoxin_fc") | |
143 log.write("\n\nShigatoxin Typer v2.0\n==============\n") | |
144 log.write(os.popen("cat " + TOOL_DIR + "/data/ShigatoxinTyping_db.txt").read()) | |
145 if args.serotyping: | |
146 # SEROTYPER | |
147 subprocess.call("echo 'sseqid\tpident\tlength\tpositive' > serogroup_OH_fcd", shell=True) | |
148 if args.input2: | |
149 subprocess.call("sh " + TOOL_DIR + "/scripts/serotype.sh " + TOOL_DIR + " y " + args.input1 + " " + args.input2 + " " + args.contigs, shell=True) | |
150 else: | |
151 subprocess.call("sh " + TOOL_DIR + "/scripts/serotype.sh " + TOOL_DIR + " n " + args.input1 + " xxx " + args.contigs, shell=True) | |
152 # SEROTYPER O | |
153 subprocess.call("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' serogroup_O | sort -nrk 2 -nrk 3 > serogroup_O_fc", shell=True) | |
154 subprocess.call("awk -F , '!seen[$0]++' serogroup_O_fc > serogroup_O_fcd", shell=True) | |
155 sero_typing_o = openFileAsTable("serogroup_O_fcd") | |
156 subprocess.call("cat serogroup_O_fcd >> serogroup_OH_fcd", shell=True) | |
157 shutil.move("serogroup_O_fcd", args.antigen_O) | |
158 # SEROTYPER H | |
159 subprocess.call("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' serogroup_H | sort -nrk 2 -nrk 3 > serogroup_H_fc", shell=True) | |
160 subprocess.call("awk -F , '!seen[$0]++' serogroup_H_fc > serogroup_H_fcd", shell=True) | |
161 sero_typing_h = openFileAsTable("serogroup_H_fcd") | |
162 subprocess.call("cat serogroup_H_fcd >> serogroup_OH_fcd", shell=True) | |
163 shutil.move("serogroup_H_fcd", args.antigen_H) | |
164 if os.stat(args.antigen_O).st_size == 0 and os.stat(args.antigen_H).st_size == 0: | |
165 subprocess.call("echo '-\t-\t-\t-' >> serogroup_OH_fcd", shell=True) | |
166 log.write("\n\nSero Typer\n==============\n") | |
167 log.write(os.popen("cat " + TOOL_DIR + "/data/SeroTyping_db.txt").read()) | |
168 if args.amrtyping: | |
169 # AMRGENES | |
170 # subprocess.call("amrfinder --threads 4 --database " + TOOL_DIR + "/data/amrfinder -n " + args.contigs + " -O Escherichia -o " + args.amrgenes, shell=True) | |
171 subprocess.call("abricate --db resfinder " + args.contigs + " > " + args.amrgenes, shell=True) | |
172 log.write("\n\nAMR Typer\n==============\nabricate ") | |
173 log.write(os.popen("abricate --version").read()) | |
174 log.write("\ndatabase version: ") | |
175 log.write(os.popen("abricate --list | grep resfinder").read()) | |
176 # REPORT | |
177 try: | |
178 report = open(args.output, 'w') | |
179 # write head html | |
180 insertFile(TOOL_DIR + "/report_head.html", report) | |
181 report.write("<td><h1>EURL VTEC WGS PT</h1><h2>Report for %s</h2>%s</td>" % (args.input1_name, datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"))) | |
182 insertFile(TOOL_DIR + "/report_head2.html", report) | |
183 # write results | |
184 report.write("<h3>Summary</h3>\n") | |
185 if args.serotyping: | |
186 report.write("<p>Serotype: ") | |
187 if len(sero_typing_o) == 0: | |
188 report.write("O?") | |
189 else: | |
190 report.write("%s" % sero_typing_o[0][0][sero_typing_o[0][0].rfind("O"):]) | |
191 if len(sero_typing_h) == 0: | |
192 report.write(":H?") | |
193 else: | |
194 report.write(":%s" % sero_typing_h[0][0][sero_typing_h[0][0].rfind("H"):]) | |
195 report.write("</p>\n") | |
196 report.write("<p>Sequence type: ") | |
197 if len(sequence_typing) < 2: | |
198 report.write("Sequence typing failed") | |
199 elif sequence_typing[1][1] == "-": | |
200 report.write("Sequence typing failed") | |
201 else: | |
202 report.write("ST%s" % sequence_typing[1][0]) | |
203 report.write("</p>\n") | |
204 if args.virulotyping: | |
205 subprocess.call("sort " + args.virulotyper + " | awk '/eae_|stx1._|stx2._|ehxa_/ && $2>50 && !seen[substr($1, 1, index($1, \"_\")-2)]++ { printf(\"%s%s\",sep,substr($1, 1, index($1, \"_\")-1));sep=\", \" }END{print \"\"}' > virulotyper_rep", shell=True) | |
206 for line in fileinput.input("virulotyper_rep", inplace=True): | |
207 print(line.replace("1a", "1"),) | |
208 for line in fileinput.input("virulotyper_rep", inplace=True): | |
209 print(line.replace("2a", "2"),) | |
210 for line in fileinput.input("virulotyper_rep", inplace=True): | |
211 print(line.replace("1b", "1"),) | |
212 for line in fileinput.input("virulotyper_rep", inplace=True): | |
213 print(line.replace("2b", "2"),) | |
214 report.write("<p>Virulotypes: ") | |
215 insertFile("virulotyper_rep", report) | |
216 report.write("</p>\n") | |
217 if args.shigatoxintyping: | |
218 report.write("<p>Stx Subtypes: ") | |
219 if len(shigatoxin_typing) == 0: | |
220 str_shigatoxin_subtype = "No subtype match found" | |
221 else: | |
222 # get corresponding subtypes | |
223 str_shigatoxin_subtype = "" | |
224 shigatoxin_subtypes = [] | |
225 shigatoxin_subtypes_raw = [] | |
226 shigatoxin_types = openFileAsTable(TOOL_DIR + "/data/stx_subtypes") | |
227 for subtype in shigatoxin_typing: | |
228 blast_pident_100 = float(subtype[1]) == 100 | |
229 if (blast_pident_100): | |
230 for item in shigatoxin_types: | |
231 if item[0] == subtype[0] and item[1] not in shigatoxin_subtypes_raw: | |
232 shigatoxin_subtypes.append(item[1]) | |
233 shigatoxin_subtypes_raw.append(item[1]) | |
234 # partial matches | |
235 for subtype in shigatoxin_typing: | |
236 for item in shigatoxin_types: | |
237 if item[0] == subtype[0] and item[1] not in shigatoxin_subtypes_raw: | |
238 if item[1][0:4] == "stx1": | |
239 shigatoxin_subtypes.append(item[1] + "(" + str(float(subtype[1])) + ")") | |
240 shigatoxin_subtypes_raw.append(item[1]) | |
241 if item[1][0:4] == "stx2": | |
242 shigatoxin_subtypes.append(item[1] + "(" + str(float(subtype[1])) + ")") | |
243 shigatoxin_subtypes_raw.append(item[1]) | |
244 shigatoxin_subtypes.sort() | |
245 str_shigatoxin_subtype = " ".join(shigatoxin_subtypes) | |
246 report.write("%s" % str_shigatoxin_subtype) | |
247 report.write("</p>\n") | |
248 # Quality Check | |
249 disclaimer = False | |
250 if any("-" in s for s in sequence_typing) or any("?" in s for s in sequence_typing): | |
251 disclaimer = True | |
252 if disclaimer: | |
253 report.write("<p style='font-weight:bold;color:red'>Disclaimer: The data analysed do not fulfill minimum quality parameters, please consider repeating the sequencing.</p>\n") | |
254 report.write("<hr/><h3>Raw data quality check</h3>\n") | |
255 if args.input2: | |
256 report.write("<p>FASTQC result forward: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html1_id)) | |
257 report.write("<p>FASTQC result reverse: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html2_id)) | |
258 else: | |
259 report.write("<p>FASTQC result: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html1_id)) | |
260 if args.serotyping: | |
261 report.write("<br/><hr/><h3>Serotyping</h3>\n") | |
262 insertFileAsTable("serogroup_OH_fcd", report, True) | |
263 report.write("<br/><hr/><h3>Multi Locus Sequence Typing</h3>\n") | |
264 if len(sequence_typing) > 1: | |
265 insertTable(sequence_typing, report, True) | |
266 if args.virulotyping: | |
267 report.write("<br/><hr/><h3>Virulotyping</h3>\n") | |
268 report.write("<p>This table is filtered for results with >90%% gene coverage, unfiltered results can be found <a href='%s/datasets/%s/display/?preview=True'>here</a></p>\n" % (BASE_URL, args.virulotyper_id)) | |
269 insertFileAsTable("pathotyper_rep_tab", report, True, "table table-cross") | |
270 if args.shigatoxintyping: | |
271 report.write("<br/><hr/><h3>Shiga toxin typing</h3>\n") | |
272 insertFileAsTable(args.stx, report, True) | |
273 if args.amrtyping: | |
274 report.write("<br/><hr/><h3>AMR typing</h3>\n") | |
275 report.write("<p>AMR result: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.amr_id)) | |
276 # write tail html | |
277 insertFile(TOOL_DIR + "/report_tail.html", report) | |
278 finally: | |
279 report.close() | |
280 | |
281 if __name__ == "__main__": | |
282 __main__() |