diff lrn_risk.py @ 1:f98c92618a6c draft

Uploaded
author greg
date Fri, 28 Apr 2023 15:06:29 +0000
parents 99e04eba4033
children 8dc6d4aa17ec
line wrap: on
line diff
--- a/lrn_risk.py	Thu Apr 27 19:22:36 2023 +0000
+++ b/lrn_risk.py	Fri Apr 28 15:06:29 2023 +0000
@@ -11,15 +11,17 @@
     # get GTDB species
     # assumes there is one genome in the GTDB-Tk output file
     with open(f, 'r') as fh:
-        for line in fh:
-            if line.find('user_genome') < 0:
-                items = line.split('\t')
-                tax = items[1].strip()
-                tax = tax.split(';')[-1].strip()
-                # split on GTDB species tag
-                tax = tax.split('s__')[1].strip()
-                if len(tax) == 0:
-                    tax = '(Unknown Species)'
+        for i, line in enumerate(fh):
+            if i == 0:
+                # Skip header.
+                continue
+            items = line.split('\t')
+            tax = items[1].strip()
+            tax = tax.split(';')[-1].strip()
+            # split on GTDB species tag
+            tax = tax.split('s__')[1].strip()
+            if len(tax) == 0:
+                tax = '(Unknown Species)'
     return tax