Mercurial > repos > greg > lrn_risk
comparison lrn_risk.py @ 6:a3799444f281 draft default tip
Uploaded
| author | greg |
|---|---|
| date | Wed, 30 Aug 2023 00:04:10 +0000 |
| parents | 5aa8560d9d91 |
| children |
comparison
equal
deleted
inserted
replaced
| 5:5aa8560d9d91 | 6:a3799444f281 |
|---|---|
| 91 # GTDB species (gtdb) | 91 # GTDB species (gtdb) |
| 92 # create dictionaries based on gene distribution | 92 # create dictionaries based on gene distribution |
| 93 d = {} | 93 d = {} |
| 94 annd = {} | 94 annd = {} |
| 95 gtdbd = {} | 95 gtdbd = {} |
| 96 finallines = [] | 96 finallines = [] |
| 97 warnings = [] | |
| 98 with open(f, 'r') as fh: | 97 with open(f, 'r') as fh: |
| 99 for line in fh: | 98 for line in fh: |
| 100 try: | 99 try: |
| 101 line = line.strip() | 100 line = line.strip() |
| 102 items = line.split('\t') | 101 items = line.split('\t') |
| 135 ann = 'NA' | 134 ann = 'NA' |
| 136 if gtdb in gtdbd.keys(): | 135 if gtdb in gtdbd.keys(): |
| 137 denom = gtdbd[gtdb] | 136 denom = gtdbd[gtdb] |
| 138 else: | 137 else: |
| 139 denom = 'NA' | 138 denom = 'NA' |
| 140 freetext = "*WARNING" | 139 freetext = "NA" |
| 141 warnings.append("*WARNING: This gene has never been detected in this species and/or this species has not been included in the LRNRisk database! Interpret with caution!") | |
| 142 else: | 140 else: |
| 143 ann = 'NA' | 141 ann = 'NA' |
| 144 denom = 'NA' | 142 denom = 'NA' |
| 145 freetext = "**WARNING" | 143 freetext = "NA" |
| 146 warnings.append("**WARNING: This genome belongs to an undescribed species. Interpret with caution!") | |
| 147 finallines.append('%s\t%s\t%s' % (bv, ann, freetext)) | 144 finallines.append('%s\t%s\t%s' % (bv, ann, freetext)) |
| 148 return [finallines, warnings] | 145 return [finallines] |
| 149 | 146 |
| 150 | 147 |
| 151 def output_blacklist(blacklist, blacklist_output_file): | 148 def output_blacklist(blacklist, blacklist_output_file): |
| 152 # takes detected blacklisted genes as input (blacklist) | 149 # takes detected blacklisted genes as input (blacklist) |
| 153 # blacklist results | 150 # blacklist results |
| 162 for key in blacklist.keys(): | 159 for key in blacklist.keys(): |
| 163 val = blacklist[key] | 160 val = blacklist[key] |
| 164 fh.write('%s\t%s\tHIGH RISK\n' % (key, val)) | 161 fh.write('%s\t%s\tHIGH RISK\n' % (key, val)) |
| 165 | 162 |
| 166 | 163 |
| 167 def output_vfdb(vfdist, vfdb_output_file, vf_warnings): | 164 def output_vfdb(vfdist, vfdb_output_file): |
| 168 # takes distribution of virulence factors as input (vfdist) | 165 # takes distribution of virulence factors as input (vfdist) |
| 169 # VFDB results | 166 # VFDB results |
| 170 with open(vfdb_output_file, 'w') as fh: | 167 with open(vfdb_output_file, 'w') as fh: |
| 171 fh.write('%s\n' % '\t'.join(VFDB_HEADER)) | 168 fh.write('%s\n' % '\t'.join(VFDB_HEADER)) |
| 172 if len(vfdist) == 0: | 169 if len(vfdist) == 0: |
| 185 veval = items[-7] | 182 veval = items[-7] |
| 186 vann = items[-2] | 183 vann = items[-2] |
| 187 vnotes = items[-1] | 184 vnotes = items[-1] |
| 188 vfinal = [vgene, vcontig, vid, vcov, veval, vann, vnotes] | 185 vfinal = [vgene, vcontig, vid, vcov, veval, vann, vnotes] |
| 189 fh.write('%s\n' % '\t'.join(vfinal)) | 186 fh.write('%s\n' % '\t'.join(vfinal)) |
| 190 for vfw in sorted(vf_warnings, key=lambda x: x.count('*')): | 187 |
| 191 fh.write('%s\n' % vfw) | 188 def output_amr(amrdist, amr_output_file): |
| 192 | |
| 193 | |
| 194 def output_amr(amrdist, amr_output_file, amr_warnings): | |
| 195 # takes distribution of AMR genes as input (amrdist) | 189 # takes distribution of AMR genes as input (amrdist) |
| 196 # AMR results | 190 # AMR results |
| 197 with open(amr_output_file, 'w') as fh: | 191 with open(amr_output_file, 'w') as fh: |
| 198 fh.write('%s\n' % '\t'.join(VFDB_HEADER)) | 192 fh.write('%s\n' % '\t'.join(VFDB_HEADER)) |
| 199 if len(amrdist) == 0: | 193 if len(amrdist) == 0: |
| 212 aeval = items[-7] | 206 aeval = items[-7] |
| 213 aann = items[-2] | 207 aann = items[-2] |
| 214 anotes = items[-1] | 208 anotes = items[-1] |
| 215 afinal = [agene, acontig, aid, acov, aeval, aann, anotes] | 209 afinal = [agene, acontig, aid, acov, aeval, aann, anotes] |
| 216 fh.write('%s\n' % '\t'.join(afinal)) | 210 fh.write('%s\n' % '\t'.join(afinal)) |
| 217 for amrw in sorted(amr_warnings, key=lambda x: x.count('*')): | |
| 218 fh.write('%s\n' % amrw) | |
| 219 | |
| 220 | 211 |
| 221 # lrnrisk_prototype arguments | 212 # lrnrisk_prototype arguments |
| 222 parser = argparse.ArgumentParser() | 213 parser = argparse.ArgumentParser() |
| 223 | 214 |
| 224 parser.add_argument('--gtdb_file', action='store', dest='gtdb_file', help='Path to gtdbtk tsv file') | 215 parser.add_argument('--gtdb_file', action='store', dest='gtdb_file', help='Path to gtdbtk tsv file') |
| 240 | 231 |
| 241 blacklist = get_blacklist(virulence_genes, args.blacklist_file) | 232 blacklist = get_blacklist(virulence_genes, args.blacklist_file) |
| 242 output_blacklist(blacklist, args.blacklist_output_file) | 233 output_blacklist(blacklist, args.blacklist_output_file) |
| 243 | 234 |
| 244 vf_distribution = gene_dist(args.vf_distribution_file, virulence_genes, species) | 235 vf_distribution = gene_dist(args.vf_distribution_file, virulence_genes, species) |
| 245 vf_warnings = vf_distribution[1] | |
| 246 vf_distribution = vf_distribution[0] | 236 vf_distribution = vf_distribution[0] |
| 247 output_vfdb(vf_distribution, args.vfdb_output_file, vf_warnings) | 237 output_vfdb(vf_distribution, args.vfdb_output_file) |
| 248 | 238 |
| 249 amr_genes = get_blast_genes(args.amr_determinants_file) | 239 amr_genes = get_blast_genes(args.amr_determinants_file) |
| 250 amr_distribution = gene_dist(args.amr_distribution_file, amr_genes, species) | 240 amr_distribution = gene_dist(args.amr_distribution_file, amr_genes, species) |
| 251 amr_warnings = amr_distribution[1] | |
| 252 amr_distribution = amr_distribution[0] | 241 amr_distribution = amr_distribution[0] |
| 253 output_amr(amr_distribution, args.amr_output_file, amr_warnings) | 242 output_amr(amr_distribution, args.amr_output_file) |
