annotate resfinder/cge/resfinder.py @ 0:55051a9bc58d draft default tip

Uploaded
author dcouvin
date Mon, 10 Jan 2022 20:06:07 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
1 #!/usr/bin/env python3
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
2 from __future__ import division
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
3 import sys
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
4 import os
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
5 import time
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
6 import random
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
7 import re
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
8 from argparse import ArgumentParser
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
9 from tabulate import tabulate
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
10 import collections
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
11
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
12 from cgecore.blaster import Blaster
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
13 from cgecore.cgefinder import CGEFinder
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
14 from .output.table import TableResults
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
15
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
16
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
17 class ResFinder(CGEFinder):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
18
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
19 def __init__(self, db_conf_file, notes, db_path, db_path_kma=None,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
20 databases=None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
21 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
22 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
23 self.db_path = db_path
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
24
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
25 if(db_path_kma is None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
26 self.db_path_kma = db_path
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
27 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
28 self.db_path_kma = db_path_kma
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
29
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
30 self.configured_dbs = dict()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
31 self.kma_db_files = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
32 self.load_db_config(db_conf_file=db_conf_file)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
33
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
34 self.databases = []
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
35 self.load_databases(databases=databases)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
36
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
37 self.phenos = dict()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
38 self.load_notes(notes=notes)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
39
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
40 self.blast_results = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
41 # self.kma_results = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
42 # self.results = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
43
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
44 @staticmethod
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
45 def old_results_to_standard_output(result, software, version, run_date,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
46 run_cmd, id, mutation=False,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
47 tableresults=None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
48 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
49 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
50 std_results = TableResults(software, version, run_date, run_cmd, id)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
51 headers = [
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
52 "template_name",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
53 "template_length",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
54 "template_start_pos",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
55 "template_end_pos",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
56 "aln_length",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
57 "aln_identity",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
58 "aln_gaps",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
59 "aln_template_string",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
60 "aln_query_string",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
61 "aln_homology_string",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
62 "template_variant",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
63 "acc_no",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
64 "query_id",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
65 "query_start_pos",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
66 "query_end_pos",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
67 "query_depth",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
68 "blast_eval",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
69 "blast_bitscore",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
70 "pval",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
71 "software_score"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
72 ]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
73
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
74 for db_name, db in result.items():
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
75 if(db_name == "excluded"):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
76 continue
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
77
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
78 if(db == "No hit found"):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
79 continue
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
80
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
81 std_results.add_table(db_name)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
82 std_db = std_results.long[db_name]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
83 std_db.add_headers(headers)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
84 std_db.lock_headers = True
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
85
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
86 for unique_id, hit_db in db.items():
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
87 if(unique_id in result["excluded"]):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
88 continue
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
89 # TODO: unique_id == unique_db_id
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
90 sbjct = hit_db["sbjct_header"].split("_")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
91 template = sbjct[0]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
92 variant = sbjct[1]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
93 acc = "_".join(sbjct[2:])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
94 unique_db_id = ("{}_{}".format(template, acc))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
95 std_db[unique_db_id] = {
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
96 "template_name": template,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
97 "template_variant": variant,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
98 "acc_no": acc,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
99 "template_length": hit_db["sbjct_length"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
100 "template_start_pos": hit_db["sbjct_start"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
101 "template_end_pos": hit_db["sbjct_end"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
102 "aln_length": hit_db["HSP_length"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
103 "aln_identity": hit_db["perc_ident"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
104 "aln_gaps": hit_db["gaps"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
105 "aln_template_string": hit_db["sbjct_string"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
106 "aln_query_string": hit_db["query_string"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
107 "aln_homology_string": hit_db["homo_string"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
108 "query_id": hit_db["contig_name"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
109 "query_start_pos": hit_db["query_start"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
110 "query_end_pos": hit_db["query_end"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
111 "query_depth": hit_db.get("query_depth", "NA"),
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
112 "blast_eval": hit_db.get("evalue", "NA"),
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
113 "blast_bitscore": hit_db.get("bit", "NA"),
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
114 "pval": hit_db.get("p_value", "NA"),
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
115 "software_score": hit_db["cal_score"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
116 }
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
117
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
118 return std_results
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
119
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
120 def write_results(self, out_path, result, res_type, software="ResFinder"):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
121 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
122 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
123 if(res_type == ResFinder.TYPE_BLAST):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
124 result_str = self.results_to_str(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
125 res_type=res_type, results=result.results,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
126 query_align=result.gene_align_query,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
127 homo_align=result.gene_align_homo,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
128 sbjct_align=result.gene_align_sbjct)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
129 elif(res_type == ResFinder.TYPE_KMA):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
130 result_str = self.results_to_str(res_type=res_type,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
131 results=result)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
132
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
133 with open(out_path + "/{}_results_tab.txt".format(software), "w") as fh:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
134 fh.write(result_str[0])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
135 with open(out_path + "/{}_results_table.txt".format(software), "w") as fh:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
136 fh.write(result_str[1])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
137 with open(out_path + "/{}_results.txt".format(software), "w") as fh:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
138 fh.write(result_str[2])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
139 with open(out_path + "/{}_Resistance_gene_seq.fsa".format(software), "w") as fh:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
140 fh.write(result_str[3])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
141 with open(out_path + "/{}_Hit_in_genome_seq.fsa".format(software), "w") as fh:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
142 fh.write(result_str[4])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
143
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
144 def blast(self, inputfile, out_path, min_cov=0.9, threshold=0.6,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
145 blast="blastn", allowed_overlap=0):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
146 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
147 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
148 blast_run = Blaster(inputfile=inputfile, databases=self.databases,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
149 db_path=self.db_path, out_path=out_path,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
150 min_cov=min_cov, threshold=threshold, blast=blast,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
151 allowed_overlap=allowed_overlap)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
152 self.blast_results = blast_run.results
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
153 return blast_run
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
154
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
155 def results_to_str(self, res_type, results, query_align=None,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
156 homo_align=None, sbjct_align=None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
157
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
158 if(res_type != ResFinder.TYPE_BLAST
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
159 and res_type != ResFinder.TYPE_KMA):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
160 sys.exit("resfinder.py error: result method was not provided or "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
161 "not recognized.")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
162
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
163 # Write the header for the tab file
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
164 tab_str = ("Resistance gene\tIdentity\tAlignment Length/Gene Length\t"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
165 "Coverage\tPosition in reference\tContig\t"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
166 "Position in contig\tPhenotype\tAccession no.\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
167
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
168 table_str = ""
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
169 txt_str = ""
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
170 ref_str = ""
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
171 hit_str = ""
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
172
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
173 # Getting and writing out the results
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
174 titles = dict()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
175 rows = dict()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
176 headers = dict()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
177 txt_file_seq_text = dict()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
178 split_print = collections.defaultdict(list)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
179
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
180 for db in results:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
181 if(db == "excluded"):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
182 continue
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
183
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
184 # Clean up dbs with only excluded hits
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
185 no_hits = True
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
186 for hit in results[db]:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
187 if(hit not in results["excluded"]):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
188 no_hits = False
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
189 break
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
190 if(no_hits):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
191 results[db] = "No hit found"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
192
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
193 profile = str(self.configured_dbs[db][0])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
194 if results[db] == "No hit found":
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
195 table_str += ("%s\n%s\n\n" % (profile, results[db]))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
196 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
197 titles[db] = "%s" % (profile)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
198 headers[db] = ["Resistance gene", "Identity",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
199 "Alignment Length/Gene Length", "Coverage",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
200 "Position in reference", "Contig",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
201 "Position in contig", "Phenotype",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
202 "Accession no."]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
203 table_str += ("%s\n" % (profile))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
204 table_str += ("Resistance gene\tIdentity\t"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
205 "Alignment Length/Gene Length\tCoverage\t"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
206 "Position in reference\tContig\t"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
207 "Position in contig\tPhenotype\t"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
208 "Accession no.\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
209
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
210 rows[db] = list()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
211 txt_file_seq_text[db] = list()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
212
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
213 for hit in results[db]:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
214 if(hit in results["excluded"]):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
215 continue
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
216
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
217 res_header = results[db][hit]["sbjct_header"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
218 tmp = res_header.split("_")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
219 gene = tmp[0]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
220 acc = "_".join(tmp[2:])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
221 ID = results[db][hit]["perc_ident"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
222 coverage = results[db][hit]["perc_coverage"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
223 sbjt_length = results[db][hit]["sbjct_length"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
224 HSP = results[db][hit]["HSP_length"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
225 positions_contig = "{}..{}".format(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
226 results[db][hit]["query_start"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
227 results[db][hit]["query_end"])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
228 positions_ref = "{}..{}".format(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
229 results[db][hit]["sbjct_start"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
230 results[db][hit]["sbjct_end"])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
231 contig_name = results[db][hit]["contig_name"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
232 pheno = self.phenos.get(gene, ("Warning: gene is missing "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
233 "from Notes file. Please "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
234 "inform curator."))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
235
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
236 pheno = pheno.strip()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
237
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
238 if "split_length" in results[db][hit]:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
239 total_HSP = results[db][hit]["split_length"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
240 split_print[res_header].append([gene, ID, total_HSP,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
241 sbjt_length, coverage,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
242 positions_ref,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
243 contig_name,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
244 positions_contig,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
245 pheno, acc])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
246 tab_str += ("%s\t%s\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
247 % (gene, ID, HSP, sbjt_length, coverage,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
248 positions_ref, contig_name,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
249 positions_contig, pheno, acc)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
250 )
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
251 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
252 # Write tabels
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
253 table_str += ("%s\t%.2f\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
254 "\n" % (gene, ID, HSP, sbjt_length,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
255 coverage, positions_ref,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
256 contig_name, positions_contig,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
257 pheno, acc)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
258 )
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
259 tab_str += ("%s\t%.2f\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
260 % (gene, ID, HSP, sbjt_length, coverage,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
261 positions_ref, contig_name,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
262 positions_contig, pheno, acc)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
263 )
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
264
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
265 # Saving the output to write the txt result table
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
266 hsp_length = "%s/%s" % (HSP, sbjt_length)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
267 rows[db].append([gene, ID, hsp_length, coverage,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
268 positions_ref, contig_name,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
269 positions_contig, pheno, acc])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
270
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
271 # Writing subjet/ref sequence
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
272 if(res_type == ResFinder.TYPE_BLAST):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
273 ref_seq = sbjct_align[db][hit]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
274 elif(res_type == ResFinder.TYPE_KMA):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
275 ref_seq = results[db][hit]["sbjct_string"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
276
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
277 ref_str += (">%s_%s\n" % (gene, acc))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
278 for i in range(0, len(ref_seq), 60):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
279 ref_str += ("%s\n" % (ref_seq[i:i + 60]))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
280
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
281 # Getting the header and text for the txt file output
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
282 sbjct_start = results[db][hit]["sbjct_start"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
283 sbjct_end = results[db][hit]["sbjct_end"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
284 text = ("%s, ID: %.2f %%, "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
285 "Alignment Length/Gene Length: %s/%s, "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
286 "Coverage: %s, "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
287 "Positions in reference: %s..%s, Contig name: %s, "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
288 "Position: %s" % (gene, ID, HSP, sbjt_length,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
289 coverage, sbjct_start, sbjct_end,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
290 contig_name, positions_contig))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
291 hit_str += (">%s\n" % text)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
292
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
293 # Writing query/hit sequence
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
294 if(res_type == ResFinder.TYPE_BLAST):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
295 hit_seq = query_align[db][hit]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
296 elif(res_type == ResFinder.TYPE_KMA):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
297 hit_seq = results[db][hit]["query_string"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
298
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
299 for i in range(0, len(hit_seq), 60):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
300 hit_str += ("%s\n" % (hit_seq[i:i + 60]))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
301
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
302 # Saving the output to print the txt result file allignemts
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
303 if(res_type == ResFinder.TYPE_BLAST):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
304 txt_file_seq_text[db].append((text, ref_seq,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
305 homo_align[db][hit],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
306 hit_seq))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
307 elif(res_type == ResFinder.TYPE_KMA):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
308 txt_file_seq_text[db].append(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
309 (text, ref_seq, results[db][hit]["homo_string"],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
310 hit_seq))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
311
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
312 for res in split_print:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
313 gene = split_print[res][0][0]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
314 ID = split_print[res][0][1]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
315 HSP = split_print[res][0][2]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
316 sbjt_length = split_print[res][0][3]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
317 coverage = split_print[res][0][4]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
318 positions_ref = split_print[res][0][5]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
319 contig_name = split_print[res][0][6]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
320 positions_contig = split_print[res][0][7]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
321 pheno = split_print[res][0][8]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
322 acc = split_print[res][0][9]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
323
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
324 total_coverage = 0
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
325
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
326 for i in range(1, len(split_print[res])):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
327 ID = "%s, %.2f" % (ID, split_print[res][i][1])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
328 total_coverage += split_print[res][i][4]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
329 positions_ref = (positions_ref + ", "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
330 + split_print[res][i][5])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
331 contig_name = (contig_name + ", "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
332 + split_print[res][i][6])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
333 positions_contig = (positions_contig + ", "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
334 + split_print[res][i][7])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
335
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
336 table_str += ("%s\t%s\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
337 % (gene, ID, HSP, sbjt_length, coverage,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
338 positions_ref, contig_name,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
339 positions_contig, pheno, acc)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
340 )
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
341
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
342 hsp_length = "%s/%s" % (HSP, sbjt_length)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
343
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
344 rows[db].append([gene, ID, hsp_length, coverage,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
345 positions_ref, contig_name,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
346 positions_contig, pheno, acc])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
347
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
348 table_str += ("\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
349
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
350 # Writing table txt for all hits
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
351 for db in titles:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
352 # Txt file table
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
353 table = ResFinder.text_table(titles[db], headers[db], rows[db])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
354 txt_str += table
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
355
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
356 # Writing alignment txt for all hits
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
357 for db in titles:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
358 # Txt file alignments
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
359 txt_str += ("##################### %s #####################\n"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
360 % (db))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
361 for text in txt_file_seq_text[db]:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
362 txt_str += ("%s\n\n" % (text[0]))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
363 for i in range(0, len(text[1]), 60):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
364 txt_str += ("%s\n" % (text[1][i:i + 60]))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
365 txt_str += ("%s\n" % (text[2][i:i + 60]))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
366 txt_str += ("%s\n\n" % (text[3][i:i + 60]))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
367 txt_str += ("\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
368
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
369 return (tab_str, table_str, txt_str, ref_str, hit_str)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
370
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
371 @staticmethod
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
372 def text_table(title, headers, rows, table_format='psql'):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
373 ''' Create text table
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
374
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
375 USAGE:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
376 >>> from tabulate import tabulate
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
377 >>> title = 'My Title'
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
378 >>> headers = ['A','B']
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
379 >>> rows = [[1,2],[3,4]]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
380 >>> print(text_table(title, headers, rows))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
381 +-----------+
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
382 | My Title |
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
383 +-----+-----+
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
384 | A | B |
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
385 +=====+=====+
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
386 | 1 | 2 |
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
387 | 3 | 4 |
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
388 +-----+-----+
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
389 '''
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
390 # Create table
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
391 table = tabulate(rows, headers, tablefmt=table_format)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
392 # Prepare title injection
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
393 width = len(table.split('\n')[0])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
394 tlen = len(title)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
395 if tlen + 4 > width:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
396 # Truncate oversized titles
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
397 tlen = width - 4
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
398 title = title[:tlen]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
399 spaces = width - 2 - tlen
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
400 left_spacer = ' ' * int(spaces / 2)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
401 right_spacer = ' ' * (spaces - len(left_spacer))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
402 # Update table with title
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
403 table = '\n'.join(['+%s+' % ('-' * (width - 2)),
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
404 '|%s%s%s|' % (left_spacer,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
405 title, right_spacer),
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
406 table, '\n'])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
407 return table
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
408
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
409 def load_notes(self, notes):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
410 with open(notes, 'r') as f:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
411 for line in f:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
412 line = line.strip()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
413 if line.startswith("#"):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
414 continue
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
415 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
416 tmp = line.split(":")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
417
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
418 self.phenos[tmp[0]] = "%s %s" % (tmp[1], tmp[2])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
419
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
420 if(tmp[2].startswith("Alternate name; ")):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
421 self.phenos[tmp[2][16:]] = "%s %s" % (tmp[1], tmp[2])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
422
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
423 def load_databases(self, databases):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
424 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
425 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
426 # Check if databases and config file are correct/correponds
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
427 if databases == '':
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
428 sys.exit("Input Error: No database was specified!\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
429 elif databases is None:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
430 # Choose all available databases from the config file
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
431 self.databases = self.configured_dbs.keys()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
432 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
433 # Handle multiple databases
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
434 databases = databases.split(',')
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
435 # Check that the ResFinder DBs are valid
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
436 for db_prefix in databases:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
437 if db_prefix in self.configured_dbs:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
438 self.databases.append(db_prefix)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
439 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
440 sys.exit("Input Error: Provided database was not "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
441 "recognised! (%s)\n" % db_prefix)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
442
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
443 def load_db_config(self, db_conf_file):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
444 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
445 """
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
446 extensions = []
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
447 with open(db_conf_file) as f:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
448 for line in f:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
449 line = line.strip()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
450
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
451 if not line:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
452 continue
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
453
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
454 if line[0] == '#':
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
455 if 'extensions:' in line:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
456 extensions = [s.strip()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
457 for s in line.split('extensions:')[-1]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
458 .split(',')]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
459 continue
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
460
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
461 tmp = line.split('\t')
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
462 if len(tmp) != 3:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
463 sys.exit(("Input Error: Invalid line in the database"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
464 " config file!\nA proper entry requires 3 tab "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
465 "separated columns!\n%s") % (line))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
466
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
467 db_prefix = tmp[0].strip()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
468 name = tmp[1].split('#')[0].strip()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
469 description = tmp[2]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
470
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
471 # Check if all db files are present
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
472 for ext in extensions:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
473 db = "%s/%s.%s" % (self.db_path, db_prefix, ext)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
474 if not os.path.exists(db):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
475 sys.exit(("Input Error: The database file (%s) "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
476 "could not be found!") % (db))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
477
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
478 if db_prefix not in self.configured_dbs:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
479 self.configured_dbs[db_prefix] = []
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
480 self.configured_dbs[db_prefix].append(name)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
481
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
482 if len(self.configured_dbs) == 0:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
483 sys.exit("Input Error: No databases were found in the "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
484 "database config file!")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
485
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
486 # Loading paths for KMA databases.
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
487 for drug in self.configured_dbs:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
488 kma_db = self.db_path_kma + drug
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
489 self.kma_db_files = [kma_db + ".b", kma_db + ".length.b",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
490 kma_db + ".name.b", kma_db + ".align.b"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
491
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
492
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
493 if __name__ == '__main__':
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
494
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
495 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
496 # PARSE COMMAND LINE OPTIONS
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
497 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
498
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
499 parser = ArgumentParser()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
500 parser.add_argument("-i", "--inputfile",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
501 dest="inputfile",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
502 help="Input file",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
503 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
504 parser.add_argument("-1", "--fastq1",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
505 help="Raw read data file 1.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
506 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
507 parser.add_argument("-2", "--fastq2",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
508 help="Raw read data file 2 (only required if \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
509 data is paired-end).",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
510 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
511 parser.add_argument("-o", "--outputPath",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
512 dest="out_path",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
513 help="Path to blast output",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
514 default='')
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
515
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
516 parser.add_argument("-b", "--blastPath",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
517 dest="blast_path",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
518 help="Path to blast",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
519 default='blastn')
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
520 parser.add_argument("-p", "--databasePath",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
521 dest="db_path",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
522 help="Path to the databases",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
523 default='')
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
524
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
525 parser.add_argument("-k", "--kmaPath",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
526 dest="kma_path",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
527 help="Path to KMA",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
528 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
529 parser.add_argument("-q", "--databasePathKMA",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
530 dest="db_path_kma",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
531 help="Path to the directories containing the \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
532 KMA indexed databases. Defaults to the \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
533 directory 'kma_indexing' inside the \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
534 databasePath directory.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
535 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
536
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
537 parser.add_argument("-d", "--databases",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
538 dest="databases",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
539 help="Databases chosen to search in - if none \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
540 are specified all are used",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
541 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
542 parser.add_argument("-l", "--min_cov",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
543 dest="min_cov",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
544 help="Minimum coverage",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
545 default=0.60)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
546 parser.add_argument("-t", "--threshold",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
547 dest="threshold",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
548 help="Blast threshold for identity",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
549 default=0.90)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
550 parser.add_argument("-nano", "--nanopore",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
551 action="store_true",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
552 dest="nanopore",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
553 help="If nanopore data is used",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
554 default=False)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
555 args = parser.parse_args()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
556
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
557 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
558 # MAIN
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
559 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
560
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
561 # Defining varibales
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
562
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
563 min_cov = args.min_cov
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
564 threshold = args.threshold
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
565
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
566 # Check if valid database is provided
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
567 if args.db_path is None:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
568 sys.exit("Input Error: No database directory was provided!\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
569 elif not os.path.exists(args.db_path):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
570 sys.exit("Input Error: The specified database directory does not "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
571 "exist!\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
572 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
573 # Check existence of config file
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
574 db_config_file = '%s/config' % (args.db_path)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
575 if not os.path.exists(db_config_file):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
576 sys.exit("Input Error: The database config file could not be "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
577 "found!")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
578 # Save path
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
579 db_path = args.db_path
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
580
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
581 # Check existence of notes file
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
582 notes_path = "%s/notes.txt" % (args.db_path)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
583 if not os.path.exists(notes_path):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
584 sys.exit('Input Error: notes.txt not found! (%s)' % (notes_path))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
585
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
586 # Check for input
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
587 if args.inputfile is None and args.fastq1 is None:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
588 sys.exit("Input Error: No Input were provided!\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
589
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
590 # Check if valid input file for assembly is provided
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
591 if args.inputfile:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
592 if not os.path.exists(args.inputfile):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
593 sys.exit("Input Error: Input file does not exist!\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
594 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
595 inputfile = args.inputfile
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
596 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
597 inputfile = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
598
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
599 # Check if valid input files for raw data
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
600 if args.fastq1:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
601
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
602 if not os.path.exists(args.fastq1):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
603 sys.exit("Input Error: fastq1 file does not exist!\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
604 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
605 input_fastq1 = args.fastq1
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
606
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
607 if args.fastq2:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
608 if not os.path.exists(args.fastq2):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
609 sys.exit("Input Error: fastq2 file does not exist!\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
610 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
611 input_fastq2 = args.fastq2
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
612 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
613 input_fastq2 = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
614 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
615 input_fastq1 = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
616 input_fastq2 = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
617
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
618 # Check if valid output directory is provided
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
619 if not os.path.exists(args.out_path):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
620 sys.exit("Input Error: Output dirctory does not exists!\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
621 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
622 out_path = args.out_path
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
623
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
624 # If input is an assembly, then use BLAST
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
625 if(inputfile is not None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
626 finder = ResFinder(db_conf_file=db_config_file,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
627 databases=args.databases,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
628 db_path=db_path, notes=notes_path)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
629
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
630 blast_run = finder.blast(inputfile=inputfile, out_path=out_path,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
631 min_cov=min_cov, threshold=threshold,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
632 blast=args.blast_path)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
633
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
634 finder.write_results(out_path=out_path, result=blast_run,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
635 res_type=ResFinder.TYPE_BLAST)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
636
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
637 # If the input is raw read data, then use KMA
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
638 elif(input_fastq1 is not None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
639 finder = ResFinder(db_conf_file=db_config_file,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
640 databases=args.databases,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
641 db_path=db_path, db_path_kma=args.db_path_kma,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
642 notes=notes_path)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
643
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
644 # if input_fastq2 is None, it is ignored by the kma method.
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
645 if args.nanopore:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
646 kma_run = finder.kma(inputfile_1=input_fastq1,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
647 inputfile_2=input_fastq2, kma_add_args='-ont -md 5')
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
648 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
649 kma_run = finder.kma(inputfile_1=input_fastq1,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
650 inputfile_2=input_fastq2)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
651
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
652 finder.write_results(out_path=out_path, result=kma_run,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
653 res_type=ResFinder.TYPE_KMA)