annotate resfinder/run_resfinder.py @ 0:55051a9bc58d draft default tip

Uploaded
author dcouvin
date Mon, 10 Jan 2022 20:06:07 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
1 #!/usr/bin/env python3
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
2 import sys
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
3 import os
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
4 import subprocess
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
5 from argparse import ArgumentParser
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
6 import pickle
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
7
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
8 from cge.resfinder import ResFinder
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
9 from cge.pointfinder import PointFinder
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
10
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
11 # Modules used to create the extended ResFinder output (phenotype output)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
12 from cge.phenotype2genotype.isolate import Isolate
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
13 from cge.phenotype2genotype.res_profile import PhenoDB
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
14 from cge.phenotype2genotype.res_sumtable import ResSumTable
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
15 from cge.phenotype2genotype.res_sumtable import PanelNameError
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
16 from cge.out.util.generator import Generator
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
17 from cge.standardize_results import ResFinderResultHandler, DatabaseHandler
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
18 from cge.standardize_results import PointFinderResultHandler
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
19
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
20 import json
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
21 # TODO list:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
22 # TODO: Add input data check
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
23
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
24
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
25 # ########################################################################### #
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
26 # ######### FUNCTIONS ######### #
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
27 # ########################################################################### #
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
28
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
29
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
30 def eprint(*args, **kwargs):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
31 print(*args, file=sys.stderr, **kwargs)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
32
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
33 # TODO: Add fix species choice
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
34 species_transl = {"c. jejuni": "campylobacter jejuni",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
35 "c.jejuni": "campylobacter jejuni",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
36 "c jejuni": "campylobacter jejuni",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
37 "c. coli": "campylobacter coli",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
38 "c.coli": "campylobacter coli",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
39 "c coli": "campylobacter coli",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
40 "e. coli": "escherichia coli",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
41 "e.coli": "escherichia coli",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
42 "e coli": "escherichia coli",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
43 "ecoli": "escherichia coli",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
44 "s. enterica": "salmonella enterica",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
45 "s.enterica": "salmonella enterica",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
46 "s enterica": "salmonella enterica",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
47 "senterica": "salmonella enterica",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
48 }
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
49
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
50 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
51 # PARSE COMMAND LINE OPTIONS
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
52 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
53
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
54 parser = ArgumentParser(allow_abbrev=False)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
55
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
56 # General options
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
57 parser.add_argument("-ifa", "--inputfasta",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
58 help="Input fasta file.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
59 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
60 parser.add_argument("-ifq", "--inputfastq",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
61 help="Input fastq file(s). Assumed to be single-end fastq \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
62 if only one file is provided, and assumed to be \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
63 paired-end data if two files are provided.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
64 nargs="+",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
65 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
66
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
67 parser.add_argument("-o", "--outputPath",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
68 dest="out_path",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
69 help="Path to blast output",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
70 default='')
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
71 parser.add_argument("-b", "--blastPath",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
72 dest="blast_path",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
73 help="Path to blastn",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
74 default='blastn')
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
75 parser.add_argument("-k", "--kmaPath",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
76 dest="kma_path",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
77 help="Path to KMA",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
78 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
79 parser.add_argument("-s", "--species",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
80 help="Species in the sample",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
81 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
82
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
83 # Acquired resistance options
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
84 parser.add_argument("-db_res", "--db_path_res",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
85 help="Path to the databases for ResFinder",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
86 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
87 parser.add_argument("-db_res_kma", "--db_path_res_kma",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
88 help="Path to the ResFinder databases indexed with KMA. \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
89 Defaults to the 'kma_indexing' directory inside the \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
90 given database directory.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
91 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
92 parser.add_argument("-d", "--databases",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
93 dest="databases",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
94 help="Databases chosen to search in - if none is specified\
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
95 all is used",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
96 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
97 parser.add_argument("-acq", "--acquired",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
98 action="store_true",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
99 dest="acquired",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
100 help="Run resfinder for acquired resistance genes",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
101 default=False)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
102 parser.add_argument("-ao", "--acq_overlap",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
103 help="Genes are allowed to overlap this number of\
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
104 nucleotides. Default: 30.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
105 type=int,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
106 default=30)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
107 parser.add_argument("-l", "--min_cov",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
108 dest="min_cov",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
109 help="Minimum (breadth-of) coverage of ResFinder within the range 0-1.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
110 type=float,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
111 default=0.60)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
112 parser.add_argument("-t", "--threshold",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
113 dest="threshold",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
114 help="Threshold for identity of ResFinder within the range 0-1.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
115 type=float,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
116 default=0.80)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
117 parser.add_argument("-nano", "--nanopore",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
118 action="store_true",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
119 dest="nanopore",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
120 help="If nanopore data is used",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
121 default=False)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
122 # Point resistance option
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
123 parser.add_argument("-c", "--point",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
124 action="store_true",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
125 dest="point",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
126 help="Run pointfinder for chromosomal mutations",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
127 default=False)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
128 parser.add_argument("-db_point", "--db_path_point",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
129 help="Path to the databases for PointFinder",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
130 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
131 parser.add_argument("-db_point_kma", "--db_path_point_kma",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
132 help="Path to the PointFinder databases indexed with KMA. \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
133 Defaults to the 'kma_indexing' directory inside the \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
134 given database directory.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
135 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
136 parser.add_argument("-g",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
137 dest="specific_gene",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
138 nargs='+',
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
139 help="Specify genes existing in the database to \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
140 search for - if none is specified all genes are \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
141 included in the search.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
142 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
143 parser.add_argument("-u", "--unknown_mut",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
144 dest="unknown_mutations",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
145 action="store_true",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
146 help="Show all mutations found even if in unknown to the\
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
147 resistance database",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
148 default=False)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
149 parser.add_argument("-l_p", "--min_cov_point",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
150 dest="min_cov_point",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
151 help="Minimum (breadth-of) coverage of Pointfinder within the range 0-1. \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
152 If None is selected, the minimum coverage of \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
153 ResFinder will be used.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
154 type=float,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
155 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
156 parser.add_argument("-t_p", "--threshold_point",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
157 dest="threshold_point",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
158 help="Threshold for identity of Pointfinder within the range 0-1. \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
159 If None is selected, the minimum coverage of \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
160 ResFinder will be used.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
161 type=float,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
162 default=None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
163 # Temporary option only available temporary
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
164 parser.add_argument("--pickle",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
165 action="store_true",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
166 help="Create a pickle dump of the Isolate object. \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
167 Currently needed in the CGE webserver. Dependency \
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
168 and this option is being removed.",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
169 default=False)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
170
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
171 args = parser.parse_args()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
172
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
173 if(args.species is not None and args.species.lower() == "other"):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
174 args.species = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
175
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
176 if(args.point and not args.species):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
177 sys.exit("ERROR: Chromosomal point mutations cannot be located if no "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
178 "species has been provided. Please provide species using the "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
179 "--species option.")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
180
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
181 # Check if coverage/identity parameters are valid
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
182 if(args.min_cov > 1.0 or args.min_cov < 0.0):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
183 sys.exit("ERROR: Minimum coverage above 1 or below 0 is not allowed. Please select a minimum coverage within the range 0-1 with the flag -l.")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
184
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
185 if(args.threshold > 1.0 or args.threshold < 0.0):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
186 sys.exit("ERROR: Threshold for identity of ResFinder above 1 or below 0 is not allowed. Please select a threshold for identity within the range 0-1 with the flag -t.")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
187
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
188
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
189 # Create a "sample" name
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
190 if(args.inputfasta):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
191 args.inputfasta = os.path.abspath(args.inputfasta)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
192 if(not os.path.isfile(args.inputfasta)):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
193 sys.exit("ERROR: Input FASTA file not found: " + args.inputfasta)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
194 sample_name = os.path.basename(args.inputfasta)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
195 method = PointFinder.TYPE_BLAST
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
196 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
197 sample_name = os.path.basename(args.inputfastq[0])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
198 method = PointFinder.TYPE_KMA
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
199
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
200 if(args.inputfastq):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
201 inputfastq_1 = args.inputfastq[0]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
202 inputfastq_1 = os.path.abspath(inputfastq_1)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
203 if(not os.path.isfile(inputfastq_1)):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
204 sys.exit("ERROR: Input fastq file 1 not found: " + inputfastq_1)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
205 if(len(args.inputfastq) == 2):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
206 inputfastq_2 = args.inputfastq[1]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
207 inputfastq_2 = os.path.abspath(inputfastq_2)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
208 if(not os.path.isfile(inputfastq_2)):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
209 sys.exit("ERROR: Input fastq file 2 not found: " + inputfastq_2)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
210 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
211 inputfastq_2 = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
212
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
213 blast = args.blast_path
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
214 if(args.inputfasta):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
215 try:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
216 _ = subprocess.check_output([blast, "-h"])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
217 except FileNotFoundError as e:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
218 sys.exit("ERROR: Unable to execute blastn from the path: {}"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
219 .format(blast))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
220
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
221 # Check KMA path cge/kma/kma
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
222 if(args.inputfastq):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
223 if(args.kma_path is None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
224 kma = (os.path.dirname(os.path.realpath(__file__)) + "/cge/kma/kma")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
225 kma = os.path.abspath(kma)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
226 try:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
227 _ = subprocess.check_output([kma, "-h"])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
228 except FileNotFoundError as e:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
229 kma = "kma"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
230 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
231 kma = args.kma_path
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
232 try:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
233 _ = subprocess.check_output([kma, "-h"])
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
234 except FileNotFoundError as e:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
235 sys.exit("ERROR: Unable to execute kma from the path: {}".format(kma))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
236 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
237 kma = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
238
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
239 db_path_point = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
240
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
241 if(args.species):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
242 args.species = args.species.lower()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
243
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
244 fixed_species = species_transl.get(args.species, None)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
245 if(fixed_species):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
246 args.species = fixed_species
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
247
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
248 tmp_list = args.species.split()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
249 if(len(tmp_list) != 1 and len(tmp_list) != 2):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
250 sys.exit("ERROR: Species name must contain 1 or 2 names.")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
251
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
252 # Check Poinfinder database
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
253 if(args.point):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
254 if(len(tmp_list) == 2):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
255 point_species = "_".join(tmp_list)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
256 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
257 point_species = tmp_list[0]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
258
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
259 if(args.db_path_point is None and args.point):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
260 db_path_point = (os.path.dirname(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
261 os.path.realpath(__file__)) + "/db_pointfinder")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
262 elif(args.db_path_point is not None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
263 db_path_point = args.db_path_point
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
264
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
265 db_path_point = os.path.abspath(db_path_point)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
266 point_dbs = PointFinder.get_db_names(db_path_point)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
267
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
268 # Check if a database for species exists
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
269 if(point_species not in point_dbs and args.point):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
270 # If not db for species is found check if db for genus is found
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
271 # and use that instead
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
272 if(tmp_list[0] in point_dbs):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
273 point_species = tmp_list[0]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
274 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
275 sys.exit("ERROR: species '%s' (%s) does not seem to exist"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
276 " as a PointFinder database."
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
277 % (args.species, point_species))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
278
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
279 db_path_point_root = db_path_point
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
280 db_path_point = db_path_point + "/" + point_species
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
281
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
282 # Check output directory
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
283 args.out_path = os.path.abspath(args.out_path)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
284 os.makedirs(args.out_path, exist_ok=True)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
285
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
286 if args.acquired is False and args.point is False:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
287 sys.exit("Please specify to look for acquired resistance genes, "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
288 "chromosomal mutaitons or both!\n")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
289
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
290 # Check ResFinder database
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
291 if(args.db_path_res is None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
292 args.db_path_res = (os.path.dirname(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
293 os.path.realpath(__file__)) + "/db_resfinder")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
294 args.db_path_res = os.path.abspath(args.db_path_res)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
295 if(not os.path.exists(args.db_path_res)):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
296 sys.exit("Could not locate ResFinder database path: %s"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
297 % args.db_path_res)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
298
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
299 if(args.db_path_res_kma is None and args.acquired and args.inputfastq):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
300 args.db_path_res_kma = args.db_path_res
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
301 if(not os.path.exists(args.db_path_res_kma)):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
302 sys.exit("Could not locate ResFinder database index path: %s"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
303 % args.db_path_res_kma)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
304
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
305 min_cov = float(args.min_cov)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
306
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
307 # Initialise result dict
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
308 init_software_result = {"software_name": "ResFinder"}
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
309 git_path = os.path.abspath(os.path.dirname(__file__))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
310 std_result = Generator.init_software_result(name="ResFinder", gitdir=git_path)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
311
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
312 if(args.acquired):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
313 DatabaseHandler.load_database_metadata("ResFinder", std_result,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
314 args.db_path_res)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
315 if(args.point):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
316 DatabaseHandler.load_database_metadata("PointFinder", std_result,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
317 db_path_point_root)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
318 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
319 # ResFinder
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
320 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
321
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
322 if args.acquired is True:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
323
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
324 databases = args.databases
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
325 threshold = float(args.threshold)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
326
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
327 if(args.inputfasta):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
328 out_res_blast = args.out_path + "/resfinder_blast"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
329 os.makedirs(out_res_blast, exist_ok=True)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
330 if(args.inputfastq):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
331 out_res_kma = args.out_path + "/resfinder_kma"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
332 os.makedirs(out_res_kma, exist_ok=True)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
333
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
334 db_path_res = args.db_path_res
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
335
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
336 # Check if valid database is provided
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
337 if(db_path_res is None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
338 db_path_res = (os.path.dirname(os.path.realpath(__file__))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
339 + "/db_resfinder")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
340
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
341 if not os.path.exists(db_path_res):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
342 sys.exit("Input Error: The specified database directory does not "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
343 "exist!\nProvided path: " + str(db_path_res))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
344 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
345 # Check existence of config file
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
346 db_config_file = '%s/config' % (db_path_res)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
347 if not os.path.exists(db_config_file):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
348 sys.exit("Input Error: The database config file could not be "
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
349 "found!")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
350
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
351 # Check existence of notes file
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
352 notes_path = "%s/notes.txt" % (db_path_res)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
353 if not os.path.exists(notes_path):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
354 sys.exit('Input Error: notes.txt not found! (%s)' % (notes_path))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
355
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
356 blast_results = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
357 kma_results = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
358
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
359 # Actually running ResFinder (for acquired resistance)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
360 acquired_finder = ResFinder(db_conf_file=db_config_file,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
361 databases=args.databases, db_path=db_path_res,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
362 notes=notes_path,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
363 db_path_kma=args.db_path_res_kma)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
364
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
365 if(args.inputfasta):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
366 blast_results = acquired_finder.blast(inputfile=args.inputfasta,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
367 out_path=out_res_blast,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
368 min_cov=min_cov,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
369 threshold=threshold,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
370 blast=blast,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
371 allowed_overlap=args.acq_overlap)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
372
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
373 # DEPRECATED
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
374 # use std_result
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
375 new_std_res = ResFinder.old_results_to_standard_output(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
376 blast_results.results, software="ResFinder", version="4.1.0",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
377 run_date="fake_run_date", run_cmd="Fake run cmd",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
378 id=sample_name)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
379
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
380 # DEPRECATED
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
381 # use std_result
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
382 acquired_finder.write_results(out_path=args.out_path,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
383 result=blast_results,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
384 res_type=ResFinder.TYPE_BLAST)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
385
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
386 ResFinderResultHandler.standardize_results(std_result,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
387 blast_results.results,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
388 "ResFinder")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
389 #DEBUG
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
390 # print("STD RESULT:\n{}".format(json.dumps(std_result)))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
391
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
392 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
393 if args.nanopore:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
394 kma_run = acquired_finder.kma(inputfile_1=inputfastq_1,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
395 inputfile_2=inputfastq_2,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
396 out_path=out_res_kma,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
397 db_path_kma=args.db_path_res_kma,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
398 databases=acquired_finder.databases,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
399 min_cov=min_cov,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
400 threshold=args.threshold,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
401 kma_path=kma,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
402 sample_name="",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
403 kma_cge=True,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
404 kma_apm="p",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
405 kma_1t1=True,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
406 kma_add_args='-ont -md 5')
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
407 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
408 kma_run = acquired_finder.kma(inputfile_1=inputfastq_1,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
409 inputfile_2=inputfastq_2,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
410 out_path=out_res_kma,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
411 db_path_kma=args.db_path_res_kma,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
412 databases=acquired_finder.databases,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
413 min_cov=min_cov,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
414 threshold=args.threshold,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
415 kma_path=kma,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
416 sample_name="",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
417 kma_cge=True,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
418 kma_apm="p",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
419 kma_1t1=True)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
420
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
421 # DEPRECATED
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
422 # use std_result
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
423 new_std_res = ResFinder.old_results_to_standard_output(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
424 kma_run.results, software="ResFinder", version="4.1.0",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
425 run_date="fake_run_date", run_cmd="Fake run cmd",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
426 id=sample_name)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
427
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
428 # DEPRECATED
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
429 # use std_result
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
430 acquired_finder.write_results(out_path=args.out_path,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
431 result=kma_run.results,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
432 res_type=ResFinder.TYPE_KMA)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
433
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
434 ResFinderResultHandler.standardize_results(std_result,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
435 kma_run.results,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
436 "ResFinder")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
437 #DEBUG
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
438 # print("STD RESULT:\n{}".format(json.dumps(std_result)))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
439
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
440 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
441 # PointFinder
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
442 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
443
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
444 if args.point is True and args.species:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
445
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
446 if(args.inputfasta):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
447 out_point = os.path.abspath(args.out_path + "/pointfinder_blast")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
448 os.makedirs(out_point, exist_ok=True)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
449 if(args.inputfastq):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
450 out_point = os.path.abspath(args.out_path + "/pointfinder_kma")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
451 os.makedirs(out_point, exist_ok=True)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
452
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
453 if args.min_cov_point is None:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
454 min_cov_point = args.min_cov
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
455 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
456 min_cov_point = args.min_cov_point
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
457 if(args.min_cov_point > 1.0 or args.min_cov_point < 0.0):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
458 sys.exit("ERROR: Minimum coverage above 1 or below 0 is not allowed. Please select a minimum coverage within the range 0-1 with the flag -l_p.")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
459
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
460 if args.threshold_point is None:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
461 threshold_point = args.threshold
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
462 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
463 threshold_point = args.threshold_point
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
464 if(args.threshold_point > 1.0 or args.threshold_point < 0.0):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
465 sys.exit("ERROR: Threshold for identity of PointFinder above 1 or below 0 is not allowed. Please select a threshold for identity within the range 0-1 with the flag -t_p.")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
466
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
467 finder = PointFinder(db_path=db_path_point, species=point_species,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
468 gene_list=args.specific_gene)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
469
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
470 if(args.inputfasta):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
471 blast_run = finder.blast(inputfile=args.inputfasta,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
472 out_path=out_point,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
473 # min_cov=min_cov_point,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
474 min_cov=0.01,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
475 threshold=threshold_point,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
476 blast=blast,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
477 cut_off=False)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
478 results = blast_run.results
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
479
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
480 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
481
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
482 method = PointFinder.TYPE_KMA
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
483
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
484 kma_run = finder.kma(inputfile_1=inputfastq_1,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
485 inputfile_2=inputfastq_2,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
486 out_path=out_point,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
487 db_path_kma=db_path_point,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
488 databases=[point_species],
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
489 min_cov=0.01,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
490 # min_cov=min_cov_point,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
491 threshold=threshold_point,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
492 kma_path=kma,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
493 sample_name="",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
494 kma_cge=True,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
495 kma_apm="p",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
496 kma_1t1=True)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
497
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
498 results = kma_run.results
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
499
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
500 if(args.specific_gene):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
501 results = PointFinder.discard_unwanted_results(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
502 results=results, wanted=args.specific_gene)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
503
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
504 if(method == PointFinder.TYPE_BLAST):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
505 results_pnt = finder.find_best_seqs(results, min_cov)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
506 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
507 results_pnt = results[finder.species]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
508 if(results_pnt == "No hit found"):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
509 results_pnt = {}
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
510 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
511 results_pnt["excluded"] = results["excluded"]
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
512
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
513 # DEPRECATED
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
514 # use std_result
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
515 new_std_pnt = finder.old_results_to_standard_output(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
516 result=results_pnt, software="ResFinder", version="4.1.0",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
517 run_date="fake_run_date", run_cmd="Fake run cmd",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
518 id=sample_name)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
519
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
520 # DEPRECATED
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
521 # use std_result
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
522 finder.write_results(out_path=args.out_path, result=results,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
523 res_type=method, unknown_flag=args.unknown_mutations,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
524 min_cov=min_cov_point, perc_iden=threshold_point)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
525
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
526 #DEBUG
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
527 # print("POINT RES:\n{}".format(json.dumps(results_pnt)))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
528
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
529 PointFinderResultHandler.standardize_results(std_result,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
530 results_pnt,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
531 "PointFinder")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
532
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
533 #DEBUG
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
534 # print("STD RESULT:\n{}".format(json.dumps(std_result)))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
535 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
536 # Phenotype to genotype
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
537 ##########################################################################
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
538
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
539 # Load genotype to phenotype database
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
540 if(db_path_point):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
541 point_file = db_path_point + "/resistens-overview.txt"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
542 else:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
543 point_file = None
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
544
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
545 res_pheno_db = PhenoDB(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
546 abclassdef_file=(args.db_path_res + "/antibiotic_classes.txt"),
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
547 acquired_file=args.db_path_res + "/phenotypes.txt", point_file=point_file)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
548 # Isolate object store results
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
549 isolate = Isolate(name=sample_name)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
550 if(args.acquired):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
551 isolate.load_finder_results(std_table=std_result,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
552 phenodb=res_pheno_db,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
553 type="genes")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
554 # isolate.load_finder_results(std_table=std_result,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
555 # phenodb=res_pheno_db)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
556 # isolate.load_finder_results(std_table=new_std_res,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
557 # phenodb=res_pheno_db)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
558 if(args.point):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
559 isolate.load_finder_results(std_table=std_result,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
560 phenodb=res_pheno_db,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
561 type="seq_variations")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
562 # isolate.load_finder_results_old(std_table=new_std_pnt,
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
563 # phenodb=res_pheno_db)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
564 # isolate.load_pointfinder_tab(args.out_path + "/PointFinder_results.txt",
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
565 # res_pheno_db)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
566 isolate.calc_res_profile(res_pheno_db)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
567 if(args.acquired):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
568 ResFinderResultHandler.load_res_profile(std_result, isolate)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
569 if(args.point):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
570 PointFinderResultHandler.load_res_profile(std_result, isolate)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
571
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
572
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
573 #TODO
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
574 std_result_file = "{}/std_format_under_development.json".format(args.out_path)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
575 with open(std_result_file, 'w') as fh:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
576 fh.write(json.dumps(std_result))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
577
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
578 # Create and write the downloadable tab file
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
579 pheno_profile_str = isolate.profile_to_str_table(header=True)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
580 # TODO: REMOVE THE NEED FOR THE PICKLED FILE
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
581 if(args.pickle):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
582 isolate_pickle = open("{}/isolate.p".format(args.out_path), "wb")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
583 pickle.dump(isolate, isolate_pickle, protocol=2)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
584
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
585 pheno_table_file = args.out_path + '/pheno_table.txt'
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
586 with open(pheno_table_file, 'w') as fh:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
587 fh.write(pheno_profile_str)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
588
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
589 if(args.species is not None):
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
590 # Apply AMR panel
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
591 input_amr_panels = args.db_path_res + "/phenotype_panels.txt"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
592 res_sum_table = ResSumTable(pheno_profile_str)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
593 res_sum_table.load_amr_panels(input_amr_panels)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
594
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
595 try:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
596 panel_profile_str = res_sum_table.get_amr_panel_str(
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
597 panel_name_raw=args.species, header=True)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
598 # If specified species does not have an associated panel, just ignore it
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
599 # and exit.
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
600 except PanelNameError:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
601 eprint("Warning: No panel was detected for the species: {}"
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
602 .format(args.species))
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
603 sys.exit()
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
604
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
605 amr_panel_filename = args.species.replace(" ", "_")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
606
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
607 panel_tabel_file = (pheno_table_file[:-4] + "_" + amr_panel_filename
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
608 + ".txt")
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
609 with open(panel_tabel_file, "w") as fh:
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
610 fh.write(panel_profile_str)
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
611
55051a9bc58d Uploaded
dcouvin
parents:
diff changeset
612 sys.exit()