annotate lrn_risk.py @ 3:5a27ac020c9e draft

Uploaded
author greg
date Tue, 27 Jun 2023 14:02:05 +0000
parents 8dc6d4aa17ec
children 8fe04662585c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env python
99e04eba4033 Uploaded
greg
parents:
diff changeset
2
99e04eba4033 Uploaded
greg
parents:
diff changeset
3 import argparse
99e04eba4033 Uploaded
greg
parents:
diff changeset
4
99e04eba4033 Uploaded
greg
parents:
diff changeset
5
99e04eba4033 Uploaded
greg
parents:
diff changeset
6 BLACKLIST_HEADER = ['Blacklisted Gene', 'Reason', 'Risk Category']
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
7 VFDB_HEADER = ['Gene', 'Contig', '%ID', '%COV', 'E', 'Annotation', 'Distribution']
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
8
99e04eba4033 Uploaded
greg
parents:
diff changeset
9
99e04eba4033 Uploaded
greg
parents:
diff changeset
10 def get_species_from_gtdb(f):
99e04eba4033 Uploaded
greg
parents:
diff changeset
11 # get GTDB species
99e04eba4033 Uploaded
greg
parents:
diff changeset
12 # assumes there is one genome in the GTDB-Tk output file
99e04eba4033 Uploaded
greg
parents:
diff changeset
13 with open(f, 'r') as fh:
1
f98c92618a6c Uploaded
greg
parents: 0
diff changeset
14 for i, line in enumerate(fh):
f98c92618a6c Uploaded
greg
parents: 0
diff changeset
15 if i == 0:
f98c92618a6c Uploaded
greg
parents: 0
diff changeset
16 # Skip header.
f98c92618a6c Uploaded
greg
parents: 0
diff changeset
17 continue
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
18 try:
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
19 items = line.split('\t')
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
20 tax = items[1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
21 tax = tax.split(';')[-1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
22 # split on GTDB species tag
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
23 tax = tax.split('s__')[1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
24 except Exception:
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
25 return '(Unknown Species)'
1
f98c92618a6c Uploaded
greg
parents: 0
diff changeset
26 if len(tax) == 0:
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
27 return '(Unknown Species)'
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
28 return tax
99e04eba4033 Uploaded
greg
parents:
diff changeset
29
99e04eba4033 Uploaded
greg
parents:
diff changeset
30
99e04eba4033 Uploaded
greg
parents:
diff changeset
31 def get_blast_genes(f):
99e04eba4033 Uploaded
greg
parents:
diff changeset
32 # reads genes detected via BLAST
99e04eba4033 Uploaded
greg
parents:
diff changeset
33 # BLAST header is as follows:
99e04eba4033 Uploaded
greg
parents:
diff changeset
34 # qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore nident qlen
99e04eba4033 Uploaded
greg
parents:
diff changeset
35 d = {}
99e04eba4033 Uploaded
greg
parents:
diff changeset
36 with open(f, 'r') as fh:
99e04eba4033 Uploaded
greg
parents:
diff changeset
37 for line in fh:
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
38 try:
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
39 line = line.strip()
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
40 items = line.split('\t')
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
41 gene = items[0]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
42 # contig = items[1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
43 # pid = items[2]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
44 alen = items[3]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
45 # e = items[-4]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
46 qlen = items[-1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
47 # calculate query coverage by dividing alignment length by query length
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
48 qcov = round(float(alen) / float(qlen) * 100.0, 2)
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
49 if gene not in d.keys():
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
50 d[gene] = []
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
51 d[gene].append('%s\t%s' % (line, str(qcov)))
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
52 except Exception:
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
53 return d
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
54 return d
99e04eba4033 Uploaded
greg
parents:
diff changeset
55
99e04eba4033 Uploaded
greg
parents:
diff changeset
56
99e04eba4033 Uploaded
greg
parents:
diff changeset
57 def get_blacklist(v, b):
99e04eba4033 Uploaded
greg
parents:
diff changeset
58 # identify high-risk isolates based on blacklisted genes
99e04eba4033 Uploaded
greg
parents:
diff changeset
59 # blacklisted genes file contains two columns:
99e04eba4033 Uploaded
greg
parents:
diff changeset
60 # column 0=the gene name as it appears in the gene database
99e04eba4033 Uploaded
greg
parents:
diff changeset
61 # column 1=the reason why the gene was blacklisted, which will be reported
99e04eba4033 Uploaded
greg
parents:
diff changeset
62 # e.g., 'ANTHRAX TOXIN'
99e04eba4033 Uploaded
greg
parents:
diff changeset
63 bdict = {}
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
64 blacklist_present = {}
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
65 with open(b, 'r') as fh:
99e04eba4033 Uploaded
greg
parents:
diff changeset
66 for line in fh:
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
67 try:
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
68 line = line.strip()
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
69 items = line.split('\t')
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
70 gene = items[0]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
71 val = items[1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
72 bdict[gene] = val
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
73 except Exception:
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
74 return blacklist_present
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
75 for key in v.keys():
99e04eba4033 Uploaded
greg
parents:
diff changeset
76 if key in bdict.keys():
99e04eba4033 Uploaded
greg
parents:
diff changeset
77 val = bdict[key]
99e04eba4033 Uploaded
greg
parents:
diff changeset
78 blacklist_present[key] = val
99e04eba4033 Uploaded
greg
parents:
diff changeset
79 return blacklist_present
99e04eba4033 Uploaded
greg
parents:
diff changeset
80
99e04eba4033 Uploaded
greg
parents:
diff changeset
81
99e04eba4033 Uploaded
greg
parents:
diff changeset
82 def gene_dist(f, blast, gtdb):
99e04eba4033 Uploaded
greg
parents:
diff changeset
83 # get within-species prevalence of genes
99e04eba4033 Uploaded
greg
parents:
diff changeset
84 # for virulence factors (VFs): uses VFDB VFs detected via ABRicate's VFDB db
99e04eba4033 Uploaded
greg
parents:
diff changeset
85 # for AMR genes: uses AMR genes detected via ABRicate's ResFinder db
99e04eba4033 Uploaded
greg
parents:
diff changeset
86 # for VFs and AMR genes: genes were detected via ABRicate XXX
99e04eba4033 Uploaded
greg
parents:
diff changeset
87 # minimum nucleotide identity and coverage values >=80%
99e04eba4033 Uploaded
greg
parents:
diff changeset
88 # total of 61,161 genomes queried
99e04eba4033 Uploaded
greg
parents:
diff changeset
89 # takes VFDB or AMR gene distribution file as input (f)
99e04eba4033 Uploaded
greg
parents:
diff changeset
90 # BLAST file of VFDB or AMR genes (blast)
99e04eba4033 Uploaded
greg
parents:
diff changeset
91 # GTDB species (gtdb)
99e04eba4033 Uploaded
greg
parents:
diff changeset
92 # create dictionaries based on gene distribution
99e04eba4033 Uploaded
greg
parents:
diff changeset
93 d = {}
99e04eba4033 Uploaded
greg
parents:
diff changeset
94 annd = {}
99e04eba4033 Uploaded
greg
parents:
diff changeset
95 gtdbd = {}
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
96 finallines = []
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
97 warnings = []
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
98 with open(f, 'r') as fh:
99e04eba4033 Uploaded
greg
parents:
diff changeset
99 for line in fh:
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
100 try:
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
101 line = line.strip()
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
102 items = line.split('\t')
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
103 tax = items[0]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
104 tax = tax.split('s__')[1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
105 if len(tax) == 0:
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
106 tax = '(Unknown Species)'
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
107 gene = items[1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
108 ann = items[-1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
109 denom = items[3]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
110 d['%s___%s' % (tax, gene)] = line
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
111 annd[gene] = ann
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
112 gtdbd[tax] = denom
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
113 except Exception:
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
114 return finallines
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
115 # parse BLAST results
99e04eba4033 Uploaded
greg
parents:
diff changeset
116 for key in blast.keys():
99e04eba4033 Uploaded
greg
parents:
diff changeset
117 blastval = blast[key]
99e04eba4033 Uploaded
greg
parents:
diff changeset
118 for bv in blastval:
99e04eba4033 Uploaded
greg
parents:
diff changeset
119 testkey = '%s___%s' % (gtdb, key)
99e04eba4033 Uploaded
greg
parents:
diff changeset
120 if testkey in d.keys() and gtdb != '(Unknown Species)':
99e04eba4033 Uploaded
greg
parents:
diff changeset
121 taxval = d[testkey]
99e04eba4033 Uploaded
greg
parents:
diff changeset
122 items = taxval.split('\t')
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
123 tax = items[0]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
124 tax = tax.split('s__')[1]
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
125 if len(tax) == 0:
99e04eba4033 Uploaded
greg
parents:
diff changeset
126 tax = '(Unknown Species)'
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
127 gene = items[1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
128 pres = items[2]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
129 denom = items[3]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
130 perc = items[4]
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
131 perc = str(round(float(perc), 2))
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
132 ann = items[-1]
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
133 freetext = '{0}/{1} ({2}%)'.format(pres, denom, perc)
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
134 elif gtdb != '(Unknown Species)':
99e04eba4033 Uploaded
greg
parents:
diff changeset
135 ann = annd[key]
99e04eba4033 Uploaded
greg
parents:
diff changeset
136 denom = gtdbd[gtdb]
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
137 freetext = "*WARNING"
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
138 warnings.append("*WARNING: This gene has never been detected in this species! Interpret with caution!")
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
139 else:
99e04eba4033 Uploaded
greg
parents:
diff changeset
140 ann = annd[key]
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
141 freetext = "**WARNING"
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
142 warnings.append("**WARNING: This genome belongs to an undescribed species. Interpret with caution!")
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
143 finallines.append('%s\t%s\t%s' % (bv, ann, freetext))
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
144 return [finallines, warnings]
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
145
99e04eba4033 Uploaded
greg
parents:
diff changeset
146
99e04eba4033 Uploaded
greg
parents:
diff changeset
147 def output_blacklist(blacklist, blacklist_output_file):
99e04eba4033 Uploaded
greg
parents:
diff changeset
148 # takes detected blacklisted genes as input (blacklist)
99e04eba4033 Uploaded
greg
parents:
diff changeset
149 # blacklist results
99e04eba4033 Uploaded
greg
parents:
diff changeset
150 with open(blacklist_output_file, 'w') as fh:
99e04eba4033 Uploaded
greg
parents:
diff changeset
151 fh.write('%s\n' % '\t'.join(BLACKLIST_HEADER))
99e04eba4033 Uploaded
greg
parents:
diff changeset
152 if len(blacklist.keys()) == 0:
99e04eba4033 Uploaded
greg
parents:
diff changeset
153 # print this if no blacklisted genes are detected
99e04eba4033 Uploaded
greg
parents:
diff changeset
154 fh.write('(No blacklisted genes detected)\tNA\tNot high risk\n')
99e04eba4033 Uploaded
greg
parents:
diff changeset
155 else:
99e04eba4033 Uploaded
greg
parents:
diff changeset
156 # print this if blacklisted genes are detected
99e04eba4033 Uploaded
greg
parents:
diff changeset
157 # print a table with one row per detected blacklisted gene
99e04eba4033 Uploaded
greg
parents:
diff changeset
158 for key in blacklist.keys():
99e04eba4033 Uploaded
greg
parents:
diff changeset
159 val = blacklist[key]
99e04eba4033 Uploaded
greg
parents:
diff changeset
160 fh.write('%s\t%s\tHIGH RISK\n' % (key, val))
99e04eba4033 Uploaded
greg
parents:
diff changeset
161
99e04eba4033 Uploaded
greg
parents:
diff changeset
162
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
163 def output_vfdb(vfdist, vfdb_output_file, vf_warnings):
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
164 # takes distribution of virulence factors as input (vfdist)
99e04eba4033 Uploaded
greg
parents:
diff changeset
165 # VFDB results
99e04eba4033 Uploaded
greg
parents:
diff changeset
166 with open(vfdb_output_file, 'w') as fh:
99e04eba4033 Uploaded
greg
parents:
diff changeset
167 fh.write('%s\n' % '\t'.join(VFDB_HEADER))
99e04eba4033 Uploaded
greg
parents:
diff changeset
168 if len(vfdist) == 0:
99e04eba4033 Uploaded
greg
parents:
diff changeset
169 # print this if no VFs detected
99e04eba4033 Uploaded
greg
parents:
diff changeset
170 fh.write('%s\n' % '\t'.join(['(No VFs Detected)'] * 7))
99e04eba4033 Uploaded
greg
parents:
diff changeset
171 else:
99e04eba4033 Uploaded
greg
parents:
diff changeset
172 # print table of VFs if VFs detected
99e04eba4033 Uploaded
greg
parents:
diff changeset
173 for vline in vfdist:
99e04eba4033 Uploaded
greg
parents:
diff changeset
174 # blast_header=['Gene', 'Contig', 'Percent (%) Nucleotide Identity', 'Alignment Length', 'Mismatches', 'Gaps', 'Query Start', 'Query End', 'Subject Start', 'Subject End', 'E-Value', 'Bit Score', 'Identical Matches', 'Query Length']
99e04eba4033 Uploaded
greg
parents:
diff changeset
175 # lc_header=['Query Coverage', 'Annotation', 'Comparison to Publicly Available Genomes']
99e04eba4033 Uploaded
greg
parents:
diff changeset
176 items = vline.split('\t')
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
177 vgene = items[0]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
178 vcontig = items[1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
179 vid = items[2]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
180 vcov = items[-3]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
181 veval = items[-7]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
182 vann = items[-2]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
183 vnotes = items[-1]
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
184 vfinal = [vgene, vcontig, vid, vcov, veval, vann, vnotes]
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
185 fh.write('%s\n' % '\t'.join(vfinal))
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
186 for vfw in sorted(vf_warnings, key=lambda x: x.count('*')):
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
187 fh.write('%s\n' % vfw)
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
188
99e04eba4033 Uploaded
greg
parents:
diff changeset
189
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
190 def output_amr(amrdist, amr_output_file, amr_warnings):
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
191 # takes distribution of AMR genes as input (amrdist)
99e04eba4033 Uploaded
greg
parents:
diff changeset
192 # AMR results
99e04eba4033 Uploaded
greg
parents:
diff changeset
193 with open(amr_output_file, 'w') as fh:
99e04eba4033 Uploaded
greg
parents:
diff changeset
194 fh.write('%s\n' % '\t'.join(VFDB_HEADER))
99e04eba4033 Uploaded
greg
parents:
diff changeset
195 if len(amrdist) == 0:
99e04eba4033 Uploaded
greg
parents:
diff changeset
196 # print this if no AMR genes detected
99e04eba4033 Uploaded
greg
parents:
diff changeset
197 fh.write('%s\n' % '\t'.join(['(No AMR Genes Detected)'] * 7))
99e04eba4033 Uploaded
greg
parents:
diff changeset
198 else:
99e04eba4033 Uploaded
greg
parents:
diff changeset
199 # print this if AMR genes detected
99e04eba4033 Uploaded
greg
parents:
diff changeset
200 for aline in amrdist:
99e04eba4033 Uploaded
greg
parents:
diff changeset
201 # blast_header=['Gene', 'Contig', 'Percent (%) Nucleotide Identity', 'Alignment Length', 'Mismatches', 'Gaps', 'Query Start', 'Query End', 'Subject Start', 'Subject End', 'E-Value', 'Bit Score', 'Identical Matches', 'Query Length']
99e04eba4033 Uploaded
greg
parents:
diff changeset
202 # lc_header=['Query Coverage', 'Annotation', 'Comparison to Publicly Available Genomes']
99e04eba4033 Uploaded
greg
parents:
diff changeset
203 items = aline.split('\t')
2
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
204 agene = items[0]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
205 acontig = items[1]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
206 aid = items[2]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
207 acov = items[-3]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
208 aeval = items[-7]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
209 aann = items[-2]
8dc6d4aa17ec Uploaded
greg
parents: 1
diff changeset
210 anotes = items[-1]
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
211 afinal = [agene, acontig, aid, acov, aeval, aann, anotes]
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
212 fh.write('%s\n' % '\t'.join(afinal))
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
213 for amrw in sorted(amr_warnings, key=lambda x: x.count('*')):
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
214 fh.write('%s\n' % amrw)
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
215
99e04eba4033 Uploaded
greg
parents:
diff changeset
216
99e04eba4033 Uploaded
greg
parents:
diff changeset
217 # lrnrisk_prototype arguments
99e04eba4033 Uploaded
greg
parents:
diff changeset
218 parser = argparse.ArgumentParser()
99e04eba4033 Uploaded
greg
parents:
diff changeset
219
99e04eba4033 Uploaded
greg
parents:
diff changeset
220 parser.add_argument('--gtdb_file', action='store', dest='gtdb_file', help='Path to gtdbtk tsv file')
99e04eba4033 Uploaded
greg
parents:
diff changeset
221 parser.add_argument('--virulence_factors_file', action='store', dest='virulence_factors_file', help='Path to tsv virulence factors file')
99e04eba4033 Uploaded
greg
parents:
diff changeset
222 parser.add_argument('--amr_determinants_file', action='store', dest='amr_determinants_file', help='Path to AMR determinants tsv file')
99e04eba4033 Uploaded
greg
parents:
diff changeset
223 parser.add_argument('--blacklist_file', action='store', dest='blacklist_file', help='Path to blacklisted high-risk virulence factors tsv file')
99e04eba4033 Uploaded
greg
parents:
diff changeset
224 parser.add_argument('--vf_distribution_file', action='store', dest='vf_distribution_file', help='Path to virulence factor distribution tsv file')
99e04eba4033 Uploaded
greg
parents:
diff changeset
225 parser.add_argument('--amr_distribution_file', action='store', dest='amr_distribution_file', help='Path to AMR determinant distribution tsv file')
99e04eba4033 Uploaded
greg
parents:
diff changeset
226 parser.add_argument('--blacklist_output_file', action='store', dest='blacklist_output_file', help='Path to blacklist output file')
99e04eba4033 Uploaded
greg
parents:
diff changeset
227 parser.add_argument('--vfdb_output_file', action='store', dest='vfdb_output_file', help='Path to vfdb output file')
99e04eba4033 Uploaded
greg
parents:
diff changeset
228 parser.add_argument('--amr_output_file', action='store', dest='amr_output_file', help='Path to amr output file')
99e04eba4033 Uploaded
greg
parents:
diff changeset
229
99e04eba4033 Uploaded
greg
parents:
diff changeset
230 # parse arguments and run pipeline
99e04eba4033 Uploaded
greg
parents:
diff changeset
231 args = parser.parse_args()
99e04eba4033 Uploaded
greg
parents:
diff changeset
232
99e04eba4033 Uploaded
greg
parents:
diff changeset
233 # print_output(blacklist, vf_distribution, amr_distribution, args.output, species)
99e04eba4033 Uploaded
greg
parents:
diff changeset
234 virulence_genes = get_blast_genes(args.virulence_factors_file)
99e04eba4033 Uploaded
greg
parents:
diff changeset
235 species = get_species_from_gtdb(args.gtdb_file)
99e04eba4033 Uploaded
greg
parents:
diff changeset
236
99e04eba4033 Uploaded
greg
parents:
diff changeset
237 blacklist = get_blacklist(virulence_genes, args.blacklist_file)
99e04eba4033 Uploaded
greg
parents:
diff changeset
238 output_blacklist(blacklist, args.blacklist_output_file)
99e04eba4033 Uploaded
greg
parents:
diff changeset
239
99e04eba4033 Uploaded
greg
parents:
diff changeset
240 vf_distribution = gene_dist(args.vf_distribution_file, virulence_genes, species)
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
241 vf_warnings = vf_distribution[1]
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
242 vf_distribution = vf_distribution[0]
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
243 output_vfdb(vf_distribution, args.vfdb_output_file, vf_warnings)
0
99e04eba4033 Uploaded
greg
parents:
diff changeset
244
99e04eba4033 Uploaded
greg
parents:
diff changeset
245 amr_genes = get_blast_genes(args.amr_determinants_file)
99e04eba4033 Uploaded
greg
parents:
diff changeset
246 amr_distribution = gene_dist(args.amr_distribution_file, amr_genes, species)
3
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
247 amr_warnings = amr_distribution[1]
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
248 amr_distribution = amr_distribution[0]
5a27ac020c9e Uploaded
greg
parents: 2
diff changeset
249 output_amr(amr_distribution, args.amr_output_file, amr_warnings)