comparison lrn_risk.py @ 6:a3799444f281 draft default tip

Uploaded
author greg
date Wed, 30 Aug 2023 00:04:10 +0000
parents 5aa8560d9d91
children
comparison
equal deleted inserted replaced
5:5aa8560d9d91 6:a3799444f281
91 # GTDB species (gtdb) 91 # GTDB species (gtdb)
92 # create dictionaries based on gene distribution 92 # create dictionaries based on gene distribution
93 d = {} 93 d = {}
94 annd = {} 94 annd = {}
95 gtdbd = {} 95 gtdbd = {}
96 finallines = [] 96 finallines = []
97 warnings = []
98 with open(f, 'r') as fh: 97 with open(f, 'r') as fh:
99 for line in fh: 98 for line in fh:
100 try: 99 try:
101 line = line.strip() 100 line = line.strip()
102 items = line.split('\t') 101 items = line.split('\t')
135 ann = 'NA' 134 ann = 'NA'
136 if gtdb in gtdbd.keys(): 135 if gtdb in gtdbd.keys():
137 denom = gtdbd[gtdb] 136 denom = gtdbd[gtdb]
138 else: 137 else:
139 denom = 'NA' 138 denom = 'NA'
140 freetext = "*WARNING" 139 freetext = "NA"
141 warnings.append("*WARNING: This gene has never been detected in this species and/or this species has not been included in the LRNRisk database! Interpret with caution!")
142 else: 140 else:
143 ann = 'NA' 141 ann = 'NA'
144 denom = 'NA' 142 denom = 'NA'
145 freetext = "**WARNING" 143 freetext = "NA"
146 warnings.append("**WARNING: This genome belongs to an undescribed species. Interpret with caution!")
147 finallines.append('%s\t%s\t%s' % (bv, ann, freetext)) 144 finallines.append('%s\t%s\t%s' % (bv, ann, freetext))
148 return [finallines, warnings] 145 return [finallines]
149 146
150 147
151 def output_blacklist(blacklist, blacklist_output_file): 148 def output_blacklist(blacklist, blacklist_output_file):
152 # takes detected blacklisted genes as input (blacklist) 149 # takes detected blacklisted genes as input (blacklist)
153 # blacklist results 150 # blacklist results
162 for key in blacklist.keys(): 159 for key in blacklist.keys():
163 val = blacklist[key] 160 val = blacklist[key]
164 fh.write('%s\t%s\tHIGH RISK\n' % (key, val)) 161 fh.write('%s\t%s\tHIGH RISK\n' % (key, val))
165 162
166 163
167 def output_vfdb(vfdist, vfdb_output_file, vf_warnings): 164 def output_vfdb(vfdist, vfdb_output_file):
168 # takes distribution of virulence factors as input (vfdist) 165 # takes distribution of virulence factors as input (vfdist)
169 # VFDB results 166 # VFDB results
170 with open(vfdb_output_file, 'w') as fh: 167 with open(vfdb_output_file, 'w') as fh:
171 fh.write('%s\n' % '\t'.join(VFDB_HEADER)) 168 fh.write('%s\n' % '\t'.join(VFDB_HEADER))
172 if len(vfdist) == 0: 169 if len(vfdist) == 0:
185 veval = items[-7] 182 veval = items[-7]
186 vann = items[-2] 183 vann = items[-2]
187 vnotes = items[-1] 184 vnotes = items[-1]
188 vfinal = [vgene, vcontig, vid, vcov, veval, vann, vnotes] 185 vfinal = [vgene, vcontig, vid, vcov, veval, vann, vnotes]
189 fh.write('%s\n' % '\t'.join(vfinal)) 186 fh.write('%s\n' % '\t'.join(vfinal))
190 for vfw in sorted(vf_warnings, key=lambda x: x.count('*')): 187
191 fh.write('%s\n' % vfw) 188 def output_amr(amrdist, amr_output_file):
192
193
194 def output_amr(amrdist, amr_output_file, amr_warnings):
195 # takes distribution of AMR genes as input (amrdist) 189 # takes distribution of AMR genes as input (amrdist)
196 # AMR results 190 # AMR results
197 with open(amr_output_file, 'w') as fh: 191 with open(amr_output_file, 'w') as fh:
198 fh.write('%s\n' % '\t'.join(VFDB_HEADER)) 192 fh.write('%s\n' % '\t'.join(VFDB_HEADER))
199 if len(amrdist) == 0: 193 if len(amrdist) == 0:
212 aeval = items[-7] 206 aeval = items[-7]
213 aann = items[-2] 207 aann = items[-2]
214 anotes = items[-1] 208 anotes = items[-1]
215 afinal = [agene, acontig, aid, acov, aeval, aann, anotes] 209 afinal = [agene, acontig, aid, acov, aeval, aann, anotes]
216 fh.write('%s\n' % '\t'.join(afinal)) 210 fh.write('%s\n' % '\t'.join(afinal))
217 for amrw in sorted(amr_warnings, key=lambda x: x.count('*')):
218 fh.write('%s\n' % amrw)
219
220 211
221 # lrnrisk_prototype arguments 212 # lrnrisk_prototype arguments
222 parser = argparse.ArgumentParser() 213 parser = argparse.ArgumentParser()
223 214
224 parser.add_argument('--gtdb_file', action='store', dest='gtdb_file', help='Path to gtdbtk tsv file') 215 parser.add_argument('--gtdb_file', action='store', dest='gtdb_file', help='Path to gtdbtk tsv file')
240 231
241 blacklist = get_blacklist(virulence_genes, args.blacklist_file) 232 blacklist = get_blacklist(virulence_genes, args.blacklist_file)
242 output_blacklist(blacklist, args.blacklist_output_file) 233 output_blacklist(blacklist, args.blacklist_output_file)
243 234
244 vf_distribution = gene_dist(args.vf_distribution_file, virulence_genes, species) 235 vf_distribution = gene_dist(args.vf_distribution_file, virulence_genes, species)
245 vf_warnings = vf_distribution[1]
246 vf_distribution = vf_distribution[0] 236 vf_distribution = vf_distribution[0]
247 output_vfdb(vf_distribution, args.vfdb_output_file, vf_warnings) 237 output_vfdb(vf_distribution, args.vfdb_output_file)
248 238
249 amr_genes = get_blast_genes(args.amr_determinants_file) 239 amr_genes = get_blast_genes(args.amr_determinants_file)
250 amr_distribution = gene_dist(args.amr_distribution_file, amr_genes, species) 240 amr_distribution = gene_dist(args.amr_distribution_file, amr_genes, species)
251 amr_warnings = amr_distribution[1]
252 amr_distribution = amr_distribution[0] 241 amr_distribution = amr_distribution[0]
253 output_amr(amr_distribution, args.amr_output_file, amr_warnings) 242 output_amr(amr_distribution, args.amr_output_file)