0
|
1 #!/usr/bin/env python3
|
|
2 from __future__ import division
|
|
3 import sys
|
|
4 import os
|
|
5 import time
|
|
6 import random
|
|
7 import re
|
|
8 from argparse import ArgumentParser
|
|
9 from tabulate import tabulate
|
|
10 import collections
|
|
11
|
|
12 from cgecore.blaster import Blaster
|
|
13 from cgecore.cgefinder import CGEFinder
|
|
14 from .output.table import TableResults
|
|
15
|
|
16
|
|
17 class ResFinder(CGEFinder):
|
|
18
|
|
19 def __init__(self, db_conf_file, notes, db_path, db_path_kma=None,
|
|
20 databases=None):
|
|
21 """
|
|
22 """
|
|
23 self.db_path = db_path
|
|
24
|
|
25 if(db_path_kma is None):
|
|
26 self.db_path_kma = db_path
|
|
27 else:
|
|
28 self.db_path_kma = db_path_kma
|
|
29
|
|
30 self.configured_dbs = dict()
|
|
31 self.kma_db_files = None
|
|
32 self.load_db_config(db_conf_file=db_conf_file)
|
|
33
|
|
34 self.databases = []
|
|
35 self.load_databases(databases=databases)
|
|
36
|
|
37 self.phenos = dict()
|
|
38 self.load_notes(notes=notes)
|
|
39
|
|
40 self.blast_results = None
|
|
41 # self.kma_results = None
|
|
42 # self.results = None
|
|
43
|
|
44 @staticmethod
|
|
45 def old_results_to_standard_output(result, software, version, run_date,
|
|
46 run_cmd, id, mutation=False,
|
|
47 tableresults=None):
|
|
48 """
|
|
49 """
|
|
50 std_results = TableResults(software, version, run_date, run_cmd, id)
|
|
51 headers = [
|
|
52 "template_name",
|
|
53 "template_length",
|
|
54 "template_start_pos",
|
|
55 "template_end_pos",
|
|
56 "aln_length",
|
|
57 "aln_identity",
|
|
58 "aln_gaps",
|
|
59 "aln_template_string",
|
|
60 "aln_query_string",
|
|
61 "aln_homology_string",
|
|
62 "template_variant",
|
|
63 "acc_no",
|
|
64 "query_id",
|
|
65 "query_start_pos",
|
|
66 "query_end_pos",
|
|
67 "query_depth",
|
|
68 "blast_eval",
|
|
69 "blast_bitscore",
|
|
70 "pval",
|
|
71 "software_score"
|
|
72 ]
|
|
73
|
|
74 for db_name, db in result.items():
|
|
75 if(db_name == "excluded"):
|
|
76 continue
|
|
77
|
|
78 if(db == "No hit found"):
|
|
79 continue
|
|
80
|
|
81 std_results.add_table(db_name)
|
|
82 std_db = std_results.long[db_name]
|
|
83 std_db.add_headers(headers)
|
|
84 std_db.lock_headers = True
|
|
85
|
|
86 for unique_id, hit_db in db.items():
|
|
87 if(unique_id in result["excluded"]):
|
|
88 continue
|
|
89 # TODO: unique_id == unique_db_id
|
|
90 sbjct = hit_db["sbjct_header"].split("_")
|
|
91 template = sbjct[0]
|
|
92 variant = sbjct[1]
|
|
93 acc = "_".join(sbjct[2:])
|
|
94 unique_db_id = ("{}_{}".format(template, acc))
|
|
95 std_db[unique_db_id] = {
|
|
96 "template_name": template,
|
|
97 "template_variant": variant,
|
|
98 "acc_no": acc,
|
|
99 "template_length": hit_db["sbjct_length"],
|
|
100 "template_start_pos": hit_db["sbjct_start"],
|
|
101 "template_end_pos": hit_db["sbjct_end"],
|
|
102 "aln_length": hit_db["HSP_length"],
|
|
103 "aln_identity": hit_db["perc_ident"],
|
|
104 "aln_gaps": hit_db["gaps"],
|
|
105 "aln_template_string": hit_db["sbjct_string"],
|
|
106 "aln_query_string": hit_db["query_string"],
|
|
107 "aln_homology_string": hit_db["homo_string"],
|
|
108 "query_id": hit_db["contig_name"],
|
|
109 "query_start_pos": hit_db["query_start"],
|
|
110 "query_end_pos": hit_db["query_end"],
|
|
111 "query_depth": hit_db.get("query_depth", "NA"),
|
|
112 "blast_eval": hit_db.get("evalue", "NA"),
|
|
113 "blast_bitscore": hit_db.get("bit", "NA"),
|
|
114 "pval": hit_db.get("p_value", "NA"),
|
|
115 "software_score": hit_db["cal_score"]
|
|
116 }
|
|
117
|
|
118 return std_results
|
|
119
|
|
120 def write_results(self, out_path, result, res_type, software="ResFinder"):
|
|
121 """
|
|
122 """
|
|
123 if(res_type == ResFinder.TYPE_BLAST):
|
|
124 result_str = self.results_to_str(
|
|
125 res_type=res_type, results=result.results,
|
|
126 query_align=result.gene_align_query,
|
|
127 homo_align=result.gene_align_homo,
|
|
128 sbjct_align=result.gene_align_sbjct)
|
|
129 elif(res_type == ResFinder.TYPE_KMA):
|
|
130 result_str = self.results_to_str(res_type=res_type,
|
|
131 results=result)
|
|
132
|
|
133 with open(out_path + "/{}_results_tab.txt".format(software), "w") as fh:
|
|
134 fh.write(result_str[0])
|
|
135 with open(out_path + "/{}_results_table.txt".format(software), "w") as fh:
|
|
136 fh.write(result_str[1])
|
|
137 with open(out_path + "/{}_results.txt".format(software), "w") as fh:
|
|
138 fh.write(result_str[2])
|
|
139 with open(out_path + "/{}_Resistance_gene_seq.fsa".format(software), "w") as fh:
|
|
140 fh.write(result_str[3])
|
|
141 with open(out_path + "/{}_Hit_in_genome_seq.fsa".format(software), "w") as fh:
|
|
142 fh.write(result_str[4])
|
|
143
|
|
144 def blast(self, inputfile, out_path, min_cov=0.9, threshold=0.6,
|
|
145 blast="blastn", allowed_overlap=0):
|
|
146 """
|
|
147 """
|
|
148 blast_run = Blaster(inputfile=inputfile, databases=self.databases,
|
|
149 db_path=self.db_path, out_path=out_path,
|
|
150 min_cov=min_cov, threshold=threshold, blast=blast,
|
|
151 allowed_overlap=allowed_overlap)
|
|
152 self.blast_results = blast_run.results
|
|
153 return blast_run
|
|
154
|
|
155 def results_to_str(self, res_type, results, query_align=None,
|
|
156 homo_align=None, sbjct_align=None):
|
|
157
|
|
158 if(res_type != ResFinder.TYPE_BLAST
|
|
159 and res_type != ResFinder.TYPE_KMA):
|
|
160 sys.exit("resfinder.py error: result method was not provided or "
|
|
161 "not recognized.")
|
|
162
|
|
163 # Write the header for the tab file
|
|
164 tab_str = ("Resistance gene\tIdentity\tAlignment Length/Gene Length\t"
|
|
165 "Coverage\tPosition in reference\tContig\t"
|
|
166 "Position in contig\tPhenotype\tAccession no.\n")
|
|
167
|
|
168 table_str = ""
|
|
169 txt_str = ""
|
|
170 ref_str = ""
|
|
171 hit_str = ""
|
|
172
|
|
173 # Getting and writing out the results
|
|
174 titles = dict()
|
|
175 rows = dict()
|
|
176 headers = dict()
|
|
177 txt_file_seq_text = dict()
|
|
178 split_print = collections.defaultdict(list)
|
|
179
|
|
180 for db in results:
|
|
181 if(db == "excluded"):
|
|
182 continue
|
|
183
|
|
184 # Clean up dbs with only excluded hits
|
|
185 no_hits = True
|
|
186 for hit in results[db]:
|
|
187 if(hit not in results["excluded"]):
|
|
188 no_hits = False
|
|
189 break
|
|
190 if(no_hits):
|
|
191 results[db] = "No hit found"
|
|
192
|
|
193 profile = str(self.configured_dbs[db][0])
|
|
194 if results[db] == "No hit found":
|
|
195 table_str += ("%s\n%s\n\n" % (profile, results[db]))
|
|
196 else:
|
|
197 titles[db] = "%s" % (profile)
|
|
198 headers[db] = ["Resistance gene", "Identity",
|
|
199 "Alignment Length/Gene Length", "Coverage",
|
|
200 "Position in reference", "Contig",
|
|
201 "Position in contig", "Phenotype",
|
|
202 "Accession no."]
|
|
203 table_str += ("%s\n" % (profile))
|
|
204 table_str += ("Resistance gene\tIdentity\t"
|
|
205 "Alignment Length/Gene Length\tCoverage\t"
|
|
206 "Position in reference\tContig\t"
|
|
207 "Position in contig\tPhenotype\t"
|
|
208 "Accession no.\n")
|
|
209
|
|
210 rows[db] = list()
|
|
211 txt_file_seq_text[db] = list()
|
|
212
|
|
213 for hit in results[db]:
|
|
214 if(hit in results["excluded"]):
|
|
215 continue
|
|
216
|
|
217 res_header = results[db][hit]["sbjct_header"]
|
|
218 tmp = res_header.split("_")
|
|
219 gene = tmp[0]
|
|
220 acc = "_".join(tmp[2:])
|
|
221 ID = results[db][hit]["perc_ident"]
|
|
222 coverage = results[db][hit]["perc_coverage"]
|
|
223 sbjt_length = results[db][hit]["sbjct_length"]
|
|
224 HSP = results[db][hit]["HSP_length"]
|
|
225 positions_contig = "{}..{}".format(
|
|
226 results[db][hit]["query_start"],
|
|
227 results[db][hit]["query_end"])
|
|
228 positions_ref = "{}..{}".format(
|
|
229 results[db][hit]["sbjct_start"],
|
|
230 results[db][hit]["sbjct_end"])
|
|
231 contig_name = results[db][hit]["contig_name"]
|
|
232 pheno = self.phenos.get(gene, ("Warning: gene is missing "
|
|
233 "from Notes file. Please "
|
|
234 "inform curator."))
|
|
235
|
|
236 pheno = pheno.strip()
|
|
237
|
|
238 if "split_length" in results[db][hit]:
|
|
239 total_HSP = results[db][hit]["split_length"]
|
|
240 split_print[res_header].append([gene, ID, total_HSP,
|
|
241 sbjt_length, coverage,
|
|
242 positions_ref,
|
|
243 contig_name,
|
|
244 positions_contig,
|
|
245 pheno, acc])
|
|
246 tab_str += ("%s\t%s\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
|
|
247 % (gene, ID, HSP, sbjt_length, coverage,
|
|
248 positions_ref, contig_name,
|
|
249 positions_contig, pheno, acc)
|
|
250 )
|
|
251 else:
|
|
252 # Write tabels
|
|
253 table_str += ("%s\t%.2f\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s"
|
|
254 "\n" % (gene, ID, HSP, sbjt_length,
|
|
255 coverage, positions_ref,
|
|
256 contig_name, positions_contig,
|
|
257 pheno, acc)
|
|
258 )
|
|
259 tab_str += ("%s\t%.2f\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
|
|
260 % (gene, ID, HSP, sbjt_length, coverage,
|
|
261 positions_ref, contig_name,
|
|
262 positions_contig, pheno, acc)
|
|
263 )
|
|
264
|
|
265 # Saving the output to write the txt result table
|
|
266 hsp_length = "%s/%s" % (HSP, sbjt_length)
|
|
267 rows[db].append([gene, ID, hsp_length, coverage,
|
|
268 positions_ref, contig_name,
|
|
269 positions_contig, pheno, acc])
|
|
270
|
|
271 # Writing subjet/ref sequence
|
|
272 if(res_type == ResFinder.TYPE_BLAST):
|
|
273 ref_seq = sbjct_align[db][hit]
|
|
274 elif(res_type == ResFinder.TYPE_KMA):
|
|
275 ref_seq = results[db][hit]["sbjct_string"]
|
|
276
|
|
277 ref_str += (">%s_%s\n" % (gene, acc))
|
|
278 for i in range(0, len(ref_seq), 60):
|
|
279 ref_str += ("%s\n" % (ref_seq[i:i + 60]))
|
|
280
|
|
281 # Getting the header and text for the txt file output
|
|
282 sbjct_start = results[db][hit]["sbjct_start"]
|
|
283 sbjct_end = results[db][hit]["sbjct_end"]
|
|
284 text = ("%s, ID: %.2f %%, "
|
|
285 "Alignment Length/Gene Length: %s/%s, "
|
|
286 "Coverage: %s, "
|
|
287 "Positions in reference: %s..%s, Contig name: %s, "
|
|
288 "Position: %s" % (gene, ID, HSP, sbjt_length,
|
|
289 coverage, sbjct_start, sbjct_end,
|
|
290 contig_name, positions_contig))
|
|
291 hit_str += (">%s\n" % text)
|
|
292
|
|
293 # Writing query/hit sequence
|
|
294 if(res_type == ResFinder.TYPE_BLAST):
|
|
295 hit_seq = query_align[db][hit]
|
|
296 elif(res_type == ResFinder.TYPE_KMA):
|
|
297 hit_seq = results[db][hit]["query_string"]
|
|
298
|
|
299 for i in range(0, len(hit_seq), 60):
|
|
300 hit_str += ("%s\n" % (hit_seq[i:i + 60]))
|
|
301
|
|
302 # Saving the output to print the txt result file allignemts
|
|
303 if(res_type == ResFinder.TYPE_BLAST):
|
|
304 txt_file_seq_text[db].append((text, ref_seq,
|
|
305 homo_align[db][hit],
|
|
306 hit_seq))
|
|
307 elif(res_type == ResFinder.TYPE_KMA):
|
|
308 txt_file_seq_text[db].append(
|
|
309 (text, ref_seq, results[db][hit]["homo_string"],
|
|
310 hit_seq))
|
|
311
|
|
312 for res in split_print:
|
|
313 gene = split_print[res][0][0]
|
|
314 ID = split_print[res][0][1]
|
|
315 HSP = split_print[res][0][2]
|
|
316 sbjt_length = split_print[res][0][3]
|
|
317 coverage = split_print[res][0][4]
|
|
318 positions_ref = split_print[res][0][5]
|
|
319 contig_name = split_print[res][0][6]
|
|
320 positions_contig = split_print[res][0][7]
|
|
321 pheno = split_print[res][0][8]
|
|
322 acc = split_print[res][0][9]
|
|
323
|
|
324 total_coverage = 0
|
|
325
|
|
326 for i in range(1, len(split_print[res])):
|
|
327 ID = "%s, %.2f" % (ID, split_print[res][i][1])
|
|
328 total_coverage += split_print[res][i][4]
|
|
329 positions_ref = (positions_ref + ", "
|
|
330 + split_print[res][i][5])
|
|
331 contig_name = (contig_name + ", "
|
|
332 + split_print[res][i][6])
|
|
333 positions_contig = (positions_contig + ", "
|
|
334 + split_print[res][i][7])
|
|
335
|
|
336 table_str += ("%s\t%s\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
|
|
337 % (gene, ID, HSP, sbjt_length, coverage,
|
|
338 positions_ref, contig_name,
|
|
339 positions_contig, pheno, acc)
|
|
340 )
|
|
341
|
|
342 hsp_length = "%s/%s" % (HSP, sbjt_length)
|
|
343
|
|
344 rows[db].append([gene, ID, hsp_length, coverage,
|
|
345 positions_ref, contig_name,
|
|
346 positions_contig, pheno, acc])
|
|
347
|
|
348 table_str += ("\n")
|
|
349
|
|
350 # Writing table txt for all hits
|
|
351 for db in titles:
|
|
352 # Txt file table
|
|
353 table = ResFinder.text_table(titles[db], headers[db], rows[db])
|
|
354 txt_str += table
|
|
355
|
|
356 # Writing alignment txt for all hits
|
|
357 for db in titles:
|
|
358 # Txt file alignments
|
|
359 txt_str += ("##################### %s #####################\n"
|
|
360 % (db))
|
|
361 for text in txt_file_seq_text[db]:
|
|
362 txt_str += ("%s\n\n" % (text[0]))
|
|
363 for i in range(0, len(text[1]), 60):
|
|
364 txt_str += ("%s\n" % (text[1][i:i + 60]))
|
|
365 txt_str += ("%s\n" % (text[2][i:i + 60]))
|
|
366 txt_str += ("%s\n\n" % (text[3][i:i + 60]))
|
|
367 txt_str += ("\n")
|
|
368
|
|
369 return (tab_str, table_str, txt_str, ref_str, hit_str)
|
|
370
|
|
371 @staticmethod
|
|
372 def text_table(title, headers, rows, table_format='psql'):
|
|
373 ''' Create text table
|
|
374
|
|
375 USAGE:
|
|
376 >>> from tabulate import tabulate
|
|
377 >>> title = 'My Title'
|
|
378 >>> headers = ['A','B']
|
|
379 >>> rows = [[1,2],[3,4]]
|
|
380 >>> print(text_table(title, headers, rows))
|
|
381 +-----------+
|
|
382 | My Title |
|
|
383 +-----+-----+
|
|
384 | A | B |
|
|
385 +=====+=====+
|
|
386 | 1 | 2 |
|
|
387 | 3 | 4 |
|
|
388 +-----+-----+
|
|
389 '''
|
|
390 # Create table
|
|
391 table = tabulate(rows, headers, tablefmt=table_format)
|
|
392 # Prepare title injection
|
|
393 width = len(table.split('\n')[0])
|
|
394 tlen = len(title)
|
|
395 if tlen + 4 > width:
|
|
396 # Truncate oversized titles
|
|
397 tlen = width - 4
|
|
398 title = title[:tlen]
|
|
399 spaces = width - 2 - tlen
|
|
400 left_spacer = ' ' * int(spaces / 2)
|
|
401 right_spacer = ' ' * (spaces - len(left_spacer))
|
|
402 # Update table with title
|
|
403 table = '\n'.join(['+%s+' % ('-' * (width - 2)),
|
|
404 '|%s%s%s|' % (left_spacer,
|
|
405 title, right_spacer),
|
|
406 table, '\n'])
|
|
407 return table
|
|
408
|
|
409 def load_notes(self, notes):
|
|
410 with open(notes, 'r') as f:
|
|
411 for line in f:
|
|
412 line = line.strip()
|
|
413 if line.startswith("#"):
|
|
414 continue
|
|
415 else:
|
|
416 tmp = line.split(":")
|
|
417
|
|
418 self.phenos[tmp[0]] = "%s %s" % (tmp[1], tmp[2])
|
|
419
|
|
420 if(tmp[2].startswith("Alternate name; ")):
|
|
421 self.phenos[tmp[2][16:]] = "%s %s" % (tmp[1], tmp[2])
|
|
422
|
|
423 def load_databases(self, databases):
|
|
424 """
|
|
425 """
|
|
426 # Check if databases and config file are correct/correponds
|
|
427 if databases == '':
|
|
428 sys.exit("Input Error: No database was specified!\n")
|
|
429 elif databases is None:
|
|
430 # Choose all available databases from the config file
|
|
431 self.databases = self.configured_dbs.keys()
|
|
432 else:
|
|
433 # Handle multiple databases
|
|
434 databases = databases.split(',')
|
|
435 # Check that the ResFinder DBs are valid
|
|
436 for db_prefix in databases:
|
|
437 if db_prefix in self.configured_dbs:
|
|
438 self.databases.append(db_prefix)
|
|
439 else:
|
|
440 sys.exit("Input Error: Provided database was not "
|
|
441 "recognised! (%s)\n" % db_prefix)
|
|
442
|
|
443 def load_db_config(self, db_conf_file):
|
|
444 """
|
|
445 """
|
|
446 extensions = []
|
|
447 with open(db_conf_file) as f:
|
|
448 for line in f:
|
|
449 line = line.strip()
|
|
450
|
|
451 if not line:
|
|
452 continue
|
|
453
|
|
454 if line[0] == '#':
|
|
455 if 'extensions:' in line:
|
|
456 extensions = [s.strip()
|
|
457 for s in line.split('extensions:')[-1]
|
|
458 .split(',')]
|
|
459 continue
|
|
460
|
|
461 tmp = line.split('\t')
|
|
462 if len(tmp) != 3:
|
|
463 sys.exit(("Input Error: Invalid line in the database"
|
|
464 " config file!\nA proper entry requires 3 tab "
|
|
465 "separated columns!\n%s") % (line))
|
|
466
|
|
467 db_prefix = tmp[0].strip()
|
|
468 name = tmp[1].split('#')[0].strip()
|
|
469 description = tmp[2]
|
|
470
|
|
471 # Check if all db files are present
|
|
472 for ext in extensions:
|
|
473 db = "%s/%s.%s" % (self.db_path, db_prefix, ext)
|
|
474 if not os.path.exists(db):
|
|
475 sys.exit(("Input Error: The database file (%s) "
|
|
476 "could not be found!") % (db))
|
|
477
|
|
478 if db_prefix not in self.configured_dbs:
|
|
479 self.configured_dbs[db_prefix] = []
|
|
480 self.configured_dbs[db_prefix].append(name)
|
|
481
|
|
482 if len(self.configured_dbs) == 0:
|
|
483 sys.exit("Input Error: No databases were found in the "
|
|
484 "database config file!")
|
|
485
|
|
486 # Loading paths for KMA databases.
|
|
487 for drug in self.configured_dbs:
|
|
488 kma_db = self.db_path_kma + drug
|
|
489 self.kma_db_files = [kma_db + ".b", kma_db + ".length.b",
|
|
490 kma_db + ".name.b", kma_db + ".align.b"]
|
|
491
|
|
492
|
|
493 if __name__ == '__main__':
|
|
494
|
|
495 ##########################################################################
|
|
496 # PARSE COMMAND LINE OPTIONS
|
|
497 ##########################################################################
|
|
498
|
|
499 parser = ArgumentParser()
|
|
500 parser.add_argument("-i", "--inputfile",
|
|
501 dest="inputfile",
|
|
502 help="Input file",
|
|
503 default=None)
|
|
504 parser.add_argument("-1", "--fastq1",
|
|
505 help="Raw read data file 1.",
|
|
506 default=None)
|
|
507 parser.add_argument("-2", "--fastq2",
|
|
508 help="Raw read data file 2 (only required if \
|
|
509 data is paired-end).",
|
|
510 default=None)
|
|
511 parser.add_argument("-o", "--outputPath",
|
|
512 dest="out_path",
|
|
513 help="Path to blast output",
|
|
514 default='')
|
|
515
|
|
516 parser.add_argument("-b", "--blastPath",
|
|
517 dest="blast_path",
|
|
518 help="Path to blast",
|
|
519 default='blastn')
|
|
520 parser.add_argument("-p", "--databasePath",
|
|
521 dest="db_path",
|
|
522 help="Path to the databases",
|
|
523 default='')
|
|
524
|
|
525 parser.add_argument("-k", "--kmaPath",
|
|
526 dest="kma_path",
|
|
527 help="Path to KMA",
|
|
528 default=None)
|
|
529 parser.add_argument("-q", "--databasePathKMA",
|
|
530 dest="db_path_kma",
|
|
531 help="Path to the directories containing the \
|
|
532 KMA indexed databases. Defaults to the \
|
|
533 directory 'kma_indexing' inside the \
|
|
534 databasePath directory.",
|
|
535 default=None)
|
|
536
|
|
537 parser.add_argument("-d", "--databases",
|
|
538 dest="databases",
|
|
539 help="Databases chosen to search in - if none \
|
|
540 are specified all are used",
|
|
541 default=None)
|
|
542 parser.add_argument("-l", "--min_cov",
|
|
543 dest="min_cov",
|
|
544 help="Minimum coverage",
|
|
545 default=0.60)
|
|
546 parser.add_argument("-t", "--threshold",
|
|
547 dest="threshold",
|
|
548 help="Blast threshold for identity",
|
|
549 default=0.90)
|
|
550 parser.add_argument("-nano", "--nanopore",
|
|
551 action="store_true",
|
|
552 dest="nanopore",
|
|
553 help="If nanopore data is used",
|
|
554 default=False)
|
|
555 args = parser.parse_args()
|
|
556
|
|
557 ##########################################################################
|
|
558 # MAIN
|
|
559 ##########################################################################
|
|
560
|
|
561 # Defining varibales
|
|
562
|
|
563 min_cov = args.min_cov
|
|
564 threshold = args.threshold
|
|
565
|
|
566 # Check if valid database is provided
|
|
567 if args.db_path is None:
|
|
568 sys.exit("Input Error: No database directory was provided!\n")
|
|
569 elif not os.path.exists(args.db_path):
|
|
570 sys.exit("Input Error: The specified database directory does not "
|
|
571 "exist!\n")
|
|
572 else:
|
|
573 # Check existence of config file
|
|
574 db_config_file = '%s/config' % (args.db_path)
|
|
575 if not os.path.exists(db_config_file):
|
|
576 sys.exit("Input Error: The database config file could not be "
|
|
577 "found!")
|
|
578 # Save path
|
|
579 db_path = args.db_path
|
|
580
|
|
581 # Check existence of notes file
|
|
582 notes_path = "%s/notes.txt" % (args.db_path)
|
|
583 if not os.path.exists(notes_path):
|
|
584 sys.exit('Input Error: notes.txt not found! (%s)' % (notes_path))
|
|
585
|
|
586 # Check for input
|
|
587 if args.inputfile is None and args.fastq1 is None:
|
|
588 sys.exit("Input Error: No Input were provided!\n")
|
|
589
|
|
590 # Check if valid input file for assembly is provided
|
|
591 if args.inputfile:
|
|
592 if not os.path.exists(args.inputfile):
|
|
593 sys.exit("Input Error: Input file does not exist!\n")
|
|
594 else:
|
|
595 inputfile = args.inputfile
|
|
596 else:
|
|
597 inputfile = None
|
|
598
|
|
599 # Check if valid input files for raw data
|
|
600 if args.fastq1:
|
|
601
|
|
602 if not os.path.exists(args.fastq1):
|
|
603 sys.exit("Input Error: fastq1 file does not exist!\n")
|
|
604 else:
|
|
605 input_fastq1 = args.fastq1
|
|
606
|
|
607 if args.fastq2:
|
|
608 if not os.path.exists(args.fastq2):
|
|
609 sys.exit("Input Error: fastq2 file does not exist!\n")
|
|
610 else:
|
|
611 input_fastq2 = args.fastq2
|
|
612 else:
|
|
613 input_fastq2 = None
|
|
614 else:
|
|
615 input_fastq1 = None
|
|
616 input_fastq2 = None
|
|
617
|
|
618 # Check if valid output directory is provided
|
|
619 if not os.path.exists(args.out_path):
|
|
620 sys.exit("Input Error: Output dirctory does not exists!\n")
|
|
621 else:
|
|
622 out_path = args.out_path
|
|
623
|
|
624 # If input is an assembly, then use BLAST
|
|
625 if(inputfile is not None):
|
|
626 finder = ResFinder(db_conf_file=db_config_file,
|
|
627 databases=args.databases,
|
|
628 db_path=db_path, notes=notes_path)
|
|
629
|
|
630 blast_run = finder.blast(inputfile=inputfile, out_path=out_path,
|
|
631 min_cov=min_cov, threshold=threshold,
|
|
632 blast=args.blast_path)
|
|
633
|
|
634 finder.write_results(out_path=out_path, result=blast_run,
|
|
635 res_type=ResFinder.TYPE_BLAST)
|
|
636
|
|
637 # If the input is raw read data, then use KMA
|
|
638 elif(input_fastq1 is not None):
|
|
639 finder = ResFinder(db_conf_file=db_config_file,
|
|
640 databases=args.databases,
|
|
641 db_path=db_path, db_path_kma=args.db_path_kma,
|
|
642 notes=notes_path)
|
|
643
|
|
644 # if input_fastq2 is None, it is ignored by the kma method.
|
|
645 if args.nanopore:
|
|
646 kma_run = finder.kma(inputfile_1=input_fastq1,
|
|
647 inputfile_2=input_fastq2, kma_add_args='-ont -md 5')
|
|
648 else:
|
|
649 kma_run = finder.kma(inputfile_1=input_fastq1,
|
|
650 inputfile_2=input_fastq2)
|
|
651
|
|
652 finder.write_results(out_path=out_path, result=kma_run,
|
|
653 res_type=ResFinder.TYPE_KMA)
|