Mercurial > repos > greg > lrn_risk
comparison lrn_risk.py @ 1:f98c92618a6c draft
Uploaded
author | greg |
---|---|
date | Fri, 28 Apr 2023 15:06:29 +0000 |
parents | 99e04eba4033 |
children | 8dc6d4aa17ec |
comparison
equal
deleted
inserted
replaced
0:99e04eba4033 | 1:f98c92618a6c |
---|---|
9 | 9 |
10 def get_species_from_gtdb(f): | 10 def get_species_from_gtdb(f): |
11 # get GTDB species | 11 # get GTDB species |
12 # assumes there is one genome in the GTDB-Tk output file | 12 # assumes there is one genome in the GTDB-Tk output file |
13 with open(f, 'r') as fh: | 13 with open(f, 'r') as fh: |
14 for line in fh: | 14 for i, line in enumerate(fh): |
15 if line.find('user_genome') < 0: | 15 if i == 0: |
16 items = line.split('\t') | 16 # Skip header. |
17 tax = items[1].strip() | 17 continue |
18 tax = tax.split(';')[-1].strip() | 18 items = line.split('\t') |
19 # split on GTDB species tag | 19 tax = items[1].strip() |
20 tax = tax.split('s__')[1].strip() | 20 tax = tax.split(';')[-1].strip() |
21 if len(tax) == 0: | 21 # split on GTDB species tag |
22 tax = '(Unknown Species)' | 22 tax = tax.split('s__')[1].strip() |
23 if len(tax) == 0: | |
24 tax = '(Unknown Species)' | |
23 return tax | 25 return tax |
24 | 26 |
25 | 27 |
26 def get_blast_genes(f): | 28 def get_blast_genes(f): |
27 # reads genes detected via BLAST | 29 # reads genes detected via BLAST |