comparison lrn_risk.py @ 1:f98c92618a6c draft

Uploaded
author greg
date Fri, 28 Apr 2023 15:06:29 +0000
parents 99e04eba4033
children 8dc6d4aa17ec
comparison
equal deleted inserted replaced
0:99e04eba4033 1:f98c92618a6c
9 9
10 def get_species_from_gtdb(f): 10 def get_species_from_gtdb(f):
11 # get GTDB species 11 # get GTDB species
12 # assumes there is one genome in the GTDB-Tk output file 12 # assumes there is one genome in the GTDB-Tk output file
13 with open(f, 'r') as fh: 13 with open(f, 'r') as fh:
14 for line in fh: 14 for i, line in enumerate(fh):
15 if line.find('user_genome') < 0: 15 if i == 0:
16 items = line.split('\t') 16 # Skip header.
17 tax = items[1].strip() 17 continue
18 tax = tax.split(';')[-1].strip() 18 items = line.split('\t')
19 # split on GTDB species tag 19 tax = items[1].strip()
20 tax = tax.split('s__')[1].strip() 20 tax = tax.split(';')[-1].strip()
21 if len(tax) == 0: 21 # split on GTDB species tag
22 tax = '(Unknown Species)' 22 tax = tax.split('s__')[1].strip()
23 if len(tax) == 0:
24 tax = '(Unknown Species)'
23 return tax 25 return tax
24 26
25 27
26 def get_blast_genes(f): 28 def get_blast_genes(f):
27 # reads genes detected via BLAST 29 # reads genes detected via BLAST