annotate lib/galaxy/datatypes/metagenomics.py @ 0:e5c3175506b7 default tip

Initial tool configs for qiime, most need work.
author Jim Johnson <jj@umn.edu>
date Sun, 17 Jul 2011 10:30:11 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
2 metagenomics datatypes
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
3 James E Johnson - University of Minnesota
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
4 for Mothur
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
5 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
6
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
7 import data
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
8 import logging, os, sys, time, tempfile, shutil, string, glob, re
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
9 import galaxy.model
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
10 from galaxy.datatypes import metadata
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
11 from galaxy.datatypes import tabular
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
12 from galaxy.datatypes import sequence
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
13 from galaxy.datatypes.metadata import MetadataElement
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
14 from galaxy.datatypes.tabular import Tabular
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
15 from galaxy.datatypes.sequence import Fasta
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
16 from galaxy import util
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
17 from galaxy.datatypes.images import Html
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
18 from sniff import *
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
19
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
20 log = logging.getLogger(__name__)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
21
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
22
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
23 ## Mothur Classes
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
24
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
25 class Otu( Tabular ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
26 file_ext = 'otu'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
27
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
28 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
29 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
30 Determines whether the file is a otu (operational taxonomic unit) format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
31 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
32 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
33 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
34 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
35 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
36 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
37 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
38 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
39 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
40 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
41 if line[0] != '@':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
42 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
43 if len(linePieces) < 2:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
44 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
45 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
46 check = int(linePieces[1])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
47 if check + 2 != len(linePieces):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
48 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
49 except ValueError:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
50 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
51 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
52 if count == 5:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
53 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
54 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
55 if count < 5 and count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
56 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
57 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
58 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
59 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
60 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
61 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
62
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
63 class OtuList( Otu ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
64 file_ext = 'list'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
65
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
66 class Sabund( Otu ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
67 file_ext = 'sabund'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
68
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
69 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
70 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
71 Determines whether the file is a otu (operational taxonomic unit) format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
72 label<TAB>count[<TAB>value(1..n)]
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
73
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
74 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
75 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
76 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
77 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
78 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
79 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
80 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
81 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
82 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
83 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
84 if line[0] != '@':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
85 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
86 if len(linePieces) < 2:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
87 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
88 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
89 check = int(linePieces[1])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
90 if check + 2 != len(linePieces):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
91 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
92 for i in range( 2, len(linePieces)):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
93 ival = int(linePieces[i])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
94 except ValueError:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
95 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
96 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
97 if count >= 5:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
98 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
99 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
100 if count < 5 and count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
101 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
102 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
103 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
104 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
105 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
106 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
107
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
108 class Rabund( Sabund ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
109 file_ext = 'rabund'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
110
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
111 class GroupAbund( Otu ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
112 file_ext = 'grpabund'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
113 def init_meta( self, dataset, copy_from=None ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
114 Otu.init_meta( self, dataset, copy_from=copy_from )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
115 def set_meta( self, dataset, overwrite = True, skip=1, max_data_lines = 100000, **kwd ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
116 # See if file starts with header line
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
117 if dataset.has_data():
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
118 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
119 fh = open( dataset.file_name )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
120 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
121 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
122 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
123 if linePieces[0] == 'label' and linePieces[1] == 'Group':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
124 skip=1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
125 else:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
126 skip=0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
127 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
128 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
129 Otu.set_meta( self, dataset, overwrite, skip, max_data_lines, **kwd)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
130 def sniff( self, filename, vals_are_int=False):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
131 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
132 Determines whether the file is a otu (operational taxonomic unit) Shared format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
133 label<TAB>group<TAB>count[<TAB>value(1..n)]
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
134 The first line is column headings as of Mothur v 1.20
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
135 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
136 log.info( "sniff GroupAbund vals_are_int %s" % vals_are_int)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
137 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
138 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
139 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
140 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
141 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
142 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
143 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
144 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
145 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
146 if line[0] != '@':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
147 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
148 if len(linePieces) < 3:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
149 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
150 if count > 0 or linePieces[0] != 'label':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
151 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
152 check = int(linePieces[2])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
153 if check + 3 != len(linePieces):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
154 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
155 for i in range( 3, len(linePieces)):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
156 if vals_are_int:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
157 ival = int(linePieces[i])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
158 else:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
159 fval = float(linePieces[i])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
160 except ValueError:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
161 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
162 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
163 if count >= 5:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
164 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
165 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
166 if count < 5 and count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
167 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
168 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
169 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
170 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
171 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
172 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
173
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
174 class SharedRabund( GroupAbund ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
175 file_ext = 'shared'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
176
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
177
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
178 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
179 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
180 Determines whether the file is a otu (operational taxonomic unit) Shared format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
181 label<TAB>group<TAB>count[<TAB>value(1..n)]
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
182 The first line is column headings as of Mothur v 1.20
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
183 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
184 # return GroupAbund.sniff(self,filename,True)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
185 isme = GroupAbund.sniff(self,filename,True)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
186 log.info( "is SharedRabund %s" % isme)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
187 return isme
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
188
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
189
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
190 class RelAbund( GroupAbund ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
191 file_ext = 'relabund'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
192
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
193 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
194 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
195 Determines whether the file is a otu (operational taxonomic unit) Relative Abundance format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
196 label<TAB>group<TAB>count[<TAB>value(1..n)]
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
197 The first line is column headings as of Mothur v 1.20
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
198 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
199 # return GroupAbund.sniff(self,filename,False)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
200 isme = GroupAbund.sniff(self,filename,False)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
201 log.info( "is RelAbund %s" % isme)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
202 return isme
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
203
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
204 class SecondaryStructureMap(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
205 file_ext = 'map'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
206 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
207 """Initialize secondary structure map datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
208 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
209 self.column_names = ['Map']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
210
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
211 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
212 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
213 Determines whether the file is a secondary structure map format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
214 A single column with an integer value which indicates the row that this row maps to.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
215 check you make sure is structMap[10] = 380 then structMap[380] = 10.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
216 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
217 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
218 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
219 line_num = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
220 rowidxmap = {}
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
221 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
222 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
223 line_num += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
224 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
225 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
226 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
227 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
228 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
229 pointer = int(line)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
230 if pointer > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
231 if pointer > line_num:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
232 rowidxmap[line_num] = pointer
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
233 elif pointer < line_num & rowidxmap[pointer] != line_num:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
234 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
235 except ValueError:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
236 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
237 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
238 if count < 5 and count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
239 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
240 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
241 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
242 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
243 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
244 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
245
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
246 class SequenceAlignment( Fasta ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
247 file_ext = 'align'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
248 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
249 Fasta.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
250 """Initialize AlignCheck datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
251
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
252 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
253 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
254 Determines whether the file is in Mothur align fasta format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
255 Each sequence line must be the same length
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
256 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
257
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
258 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
259 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
260 len = -1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
261 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
262 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
263 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
264 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
265 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
266 if line: #first non-empty line
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
267 if line.startswith( '>' ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
268 #The next line.strip() must not be '', nor startwith '>'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
269 line = fh.readline().strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
270 if line == '' or line.startswith( '>' ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
271 break
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
272 if len < 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
273 len = len(line)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
274 elif len != len(line):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
275 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
276 else:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
277 break #we found a non-empty line, but its not a fasta header
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
278 if len > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
279 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
280 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
281 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
282 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
283 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
284 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
285
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
286 class AlignCheck( Tabular ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
287 file_ext = 'align.check'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
288 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
289 """Initialize AlignCheck datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
290 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
291 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
292 self.column_types = ['str','int','int','int','int','int','int','int']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
293 self.comment_lines = 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
294
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
295 def set_meta( self, dataset, overwrite = True, **kwd ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
296 # Tabular.set_meta( self, dataset, overwrite = overwrite, first_line_is_header = True, skip = 1 )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
297 data_lines = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
298 if dataset.has_data():
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
299 dataset_fh = open( dataset.file_name )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
300 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
301 line = dataset_fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
302 if not line: break
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
303 data_lines += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
304 dataset_fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
305 dataset.metadata.comment_lines = 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
306 dataset.metadata.data_lines = data_lines - 1 if data_lines > 0 else 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
307 dataset.metadata.column_names = self.column_names
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
308 dataset.metadata.column_types = self.column_types
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
309
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
310 class AlignReport(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
311 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
312 QueryName QueryLength TemplateName TemplateLength SearchMethod SearchScore AlignmentMethod QueryStart QueryEnd TemplateStart TemplateEnd PairwiseAlignmentLength GapsInQuery GapsInTemplate LongestInsert SimBtwnQuery&Template
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
313 AY457915 501 82283 1525 kmer 89.07 needleman 5 501 1 499 499 2 0 0 97.6
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
314 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
315 file_ext = 'align.report'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
316 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
317 """Initialize AlignCheck datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
318 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
319 self.column_names = ['QueryName','QueryLength','TemplateName','TemplateLength','SearchMethod','SearchScore',
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
320 'AlignmentMethod','QueryStart','QueryEnd','TemplateStart','TemplateEnd',
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
321 'PairwiseAlignmentLength','GapsInQuery','GapsInTemplate','LongestInsert','SimBtwnQuery&Template'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
322 ]
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
323
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
324 class BellerophonChimera( Tabular ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
325 file_ext = 'bellerophon.chimera'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
326 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
327 """Initialize AlignCheck datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
328 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
329 self.column_names = ['Name','Score','Left','Right']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
330
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
331 class SecondaryStructureMatch(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
332 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
333 name pound dash plus equal loop tilde total
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
334 9_1_12 42 68 8 28 275 420 872
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
335 9_1_14 36 68 6 26 266 422 851
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
336 9_1_15 44 68 8 28 276 418 873
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
337 9_1_16 34 72 6 30 267 430 860
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
338 9_1_18 46 80 2 36 261
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
339 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
340 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
341 """Initialize SecondaryStructureMatch datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
342 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
343 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
344
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
345 class DistanceMatrix(data.Text):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
346 file_ext = 'dist'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
347 """Add metadata elements"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
348 MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
349
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
350
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
351 class LowerTriangleDistanceMatrix(DistanceMatrix):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
352 file_ext = 'lower.dist'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
353 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
354 """Initialize secondary structure map datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
355 DistanceMatrix.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
356
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
357 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
358 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
359 Determines whether the file is a lower-triangle distance matrix (phylip) format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
360 The first line has the number of sequences in the matrix.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
361 The remaining lines have the sequence name followed by a list of distances from all preceeding sequences
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
362 5
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
363 U68589
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
364 U68590 0.3371
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
365 U68591 0.3609 0.3782
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
366 U68592 0.4155 0.3197 0.4148
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
367 U68593 0.2872 0.1690 0.3361 0.2842
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
368 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
369 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
370 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
371 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
372 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
373 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
374 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
375 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
376 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
377 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
378 if line[0] != '@':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
379 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
380 if len(linePieces) != 3:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
381 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
382 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
383 check = float(linePieces[2])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
384 except ValueError:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
385 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
386 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
387 if count == 5:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
388 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
389 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
390 if count < 5 and count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
391 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
392 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
393 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
394 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
395 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
396 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
397
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
398 class SquareDistanceMatrix(DistanceMatrix,Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
399 file_ext = 'square.dist'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
400 sequence_count = -1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
401
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
402 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
403 """Initialize secondary structure map datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
404 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
405 def init_meta( self, dataset, copy_from=None ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
406 data.Text.init_meta( self, dataset, copy_from=copy_from )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
407 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
408 dataset.metadata.sequences = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
409
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
410 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
411 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
412 Determines whether the file is a square distance matrix (Column-formatted distance matrix) format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
413 The first line has the number of sequences in the matrix.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
414 The following lines have the sequence name in the first column plus a column for the distance to each sequence
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
415 in the row order in which they appear in the matrix.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
416 3
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
417 U68589 0.0000 0.3371 0.3610
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
418 U68590 0.3371 0.0000 0.3783
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
419 U68590 0.3371 0.0000 0.3783
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
420 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
421 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
422 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
423 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
424 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
425 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
426 sequence_count = int(line)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
427 col_cnt = seq_cnt + 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
428 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
429 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
430 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
431 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
432 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
433 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
434 if line[0] != '@':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
435 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
436 if len(linePieces) != col_cnt :
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
437 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
438 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
439 for i in range(1, col_cnt):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
440 check = float(linePieces[i])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
441 except ValueError:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
442 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
443 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
444 if count == 5:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
445 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
446 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
447 if count < 5 and count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
448 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
449 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
450 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
451 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
452 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
453 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
454
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
455 class PairwiseDistanceMatrix(DistanceMatrix,Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
456 file_ext = 'pair.dist'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
457 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
458 """Initialize secondary structure map datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
459 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
460 self.column_names = ['Sequence','Sequence','Distance']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
461 self.column_types = ['str','str','float']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
462 self.comment_lines = 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
463
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
464 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
465 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
466 Determines whether the file is a pairwise distance matrix (Column-formatted distance matrix) format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
467 The first and second columns have the sequence names and the third column is the distance between those sequences.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
468 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
469 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
470 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
471 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
472 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
473 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
474 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
475 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
476 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
477 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
478 if line[0] != '@':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
479 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
480 if len(linePieces) != 3:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
481 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
482 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
483 check = float(linePieces[2])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
484 except ValueError:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
485 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
486 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
487 if count == 5:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
488 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
489 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
490 if count < 5 and count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
491 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
492 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
493 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
494 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
495 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
496 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
497
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
498 class AlignCheck(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
499 file_ext = 'align.check'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
500 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
501 """Initialize secondary structure map datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
502 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
503 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
504 self.columns = 8
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
505
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
506 class Names(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
507 file_ext = 'names'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
508 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
509 """Name file shows the relationship between a representative sequence(col 1) and the sequences(comma-separated) it represents(col 2)"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
510 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
511 self.column_names = ['name','representatives']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
512 self.columns = 2
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
513
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
514 class Summary(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
515 file_ext = 'summary'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
516 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
517 """summarizes the quality of sequences in an unaligned or aligned fasta-formatted sequence file"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
518 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
519 self.column_names = ['seqname','start','end','nbases','ambigs','polymer']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
520 self.columns = 6
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
521
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
522 class Group(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
523 file_ext = 'groups'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
524 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
525 """Name file shows the relationship between a representative sequence(col 1) and the sequences it represents(col 2)"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
526 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
527 self.column_names = ['name','group']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
528 self.columns = 2
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
529
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
530 class Design(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
531 file_ext = 'design'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
532 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
533 """Name file shows the relationship between a group(col 1) and a grouping (col 2), providing a way to merge groups."""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
534 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
535 self.column_names = ['group','grouping']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
536 self.columns = 2
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
537
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
538 class AccNos(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
539 file_ext = 'accnos'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
540 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
541 """A list of names"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
542 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
543 self.column_names = ['name']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
544 self.columns = 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
545
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
546 class Oligos( data.Text ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
547 file_ext = 'oligos'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
548
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
549 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
550 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
551 Determines whether the file is a otu (operational taxonomic unit) format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
552 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
553 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
554 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
555 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
556 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
557 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
558 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
559 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
560 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
561 else:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
562 if line[0] != '#':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
563 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
564 if len(linePieces) == 2 and re.match('forward|reverse',linePieces[0]):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
565 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
566 continue
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
567 elif len(linePieces) == 3 and re.match('barcode',linePieces[0]):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
568 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
569 continue
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
570 else:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
571 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
572 if count > 20:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
573 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
574 if count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
575 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
576 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
577 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
578 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
579 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
580 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
581
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
582 class Frequency(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
583 file_ext = 'freq'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
584 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
585 """A list of names"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
586 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
587 self.column_names = ['position','frequency']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
588 self.column_types = ['int','float']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
589
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
590 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
591 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
592 Determines whether the file is a frequency tabular format for chimera analysis
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
593 #1.14.0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
594 0 0.000
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
595 1 0.000
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
596 ...
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
597 155 0.975
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
598 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
599 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
600 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
601 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
602 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
603 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
604 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
605 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
606 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
607 else:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
608 if line[0] != '#':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
609 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
610 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
611 i = int(linePieces[0])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
612 f = float(linePieces[1])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
613 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
614 continue
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
615 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
616 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
617 if count > 20:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
618 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
619 if count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
620 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
621 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
622 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
623 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
624 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
625 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
626
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
627 class Quantile(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
628 file_ext = 'quan'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
629 MetadataElement( name="filtered", default=False, no_value=False, optional=True , desc="Quantiles calculated using a mask", readonly=True)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
630 MetadataElement( name="masked", default=False, no_value=False, optional=True , desc="Quantiles calculated using a frequency filter", readonly=True)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
631 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
632 """Quantiles for chimera analysis"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
633 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
634 self.column_names = ['num','ten','twentyfive','fifty','seventyfive','ninetyfive','ninetynine']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
635 self.column_types = ['int','float','float','float','float','float','float']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
636 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
637 log.info( "Mothur Quantile set_meta %s" % kwd)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
638 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
639 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
640 Determines whether the file is a quantiles tabular format for chimera analysis
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
641 1 0 0 0 0 0 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
642 2 0.309198 0.309198 0.37161 0.37161 0.37161 0.37161
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
643 3 0.510982 0.563213 0.693529 0.858939 1.07442 1.20608
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
644 ...
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
645 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
646 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
647 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
648 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
649 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
650 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
651 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
652 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
653 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
654 else:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
655 if line[0] != '#':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
656 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
657 linePieces = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
658 i = int(linePieces[0])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
659 f = float(linePieces[1])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
660 f = float(linePieces[2])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
661 f = float(linePieces[3])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
662 f = float(linePieces[4])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
663 f = float(linePieces[5])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
664 f = float(linePieces[6])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
665 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
666 continue
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
667 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
668 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
669 if count > 10:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
670 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
671 if count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
672 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
673 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
674 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
675 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
676 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
677 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
678
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
679 class FilteredQuantile(Quantile):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
680 file_ext = 'filtered.quan'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
681 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
682 """Quantiles for chimera analysis"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
683 Quantile.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
684 self.filtered = True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
685
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
686 class MaskedQuantile(Quantile):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
687 file_ext = 'masked.quan'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
688 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
689 """Quantiles for chimera analysis"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
690 Quantile.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
691 self.masked = True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
692 self.filtered = False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
693
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
694 class FilteredMaskedQuantile(Quantile):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
695 file_ext = 'filtered.masked.quan'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
696 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
697 """Quantiles for chimera analysis"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
698 Quantile.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
699 self.masked = True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
700 self.filtered = True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
701
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
702 class LaneMask(data.Text):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
703 file_ext = 'filter'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
704
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
705 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
706 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
707 Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
708 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
709 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
710 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
711 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
712 buff = fh.read(1000)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
713 if not buff:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
714 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
715 else:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
716 if not re.match('^[01]+$',line):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
717 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
718 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
719 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
720 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
721 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
722 close(fh)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
723 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
724
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
725 class SequenceTaxonomy(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
726 file_ext = 'seq.taxonomy'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
727 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
728 A table with 2 columns:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
729 - SequenceName
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
730 - Taxonomy (semicolon-separated taxonomy in descending order)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
731 Example:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
732 X56533.1 Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma;
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
733 X97975.1 Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida;
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
734 AF052717.1 Eukaryota;Parabasalidea;
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
735 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
736 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
737 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
738 self.column_names = ['name','taxonomy']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
739
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
740 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
741 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
742 Determines whether the file is a SequenceTaxonomy
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
743 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
744 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
745 pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;])+$'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
746 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
747 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
748 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
749 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
750 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
751 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
752 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
753 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
754 fields = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
755 if len(fields) != 2:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
756 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
757 if not re.match(pat,fields[1]):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
758 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
759 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
760 if count > 10:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
761 break
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
762 if count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
763 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
764 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
765 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
766 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
767 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
768 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
769
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
770 class RDPSequenceTaxonomy(SequenceTaxonomy):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
771 file_ext = 'rdp.taxonomy'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
772 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
773 A table with 2 columns:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
774 - SequenceName
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
775 - Taxonomy (semicolon-separated taxonomy in descending order, RDP requires exactly 6 levels deep)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
776 Example:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
777 AB001518.1 Bacteria;Bacteroidetes;Sphingobacteria;Sphingobacteriales;unclassified_Sphingobacteriales;
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
778 AB001724.1 Bacteria;Cyanobacteria;Cyanobacteria;Family_II;GpIIa;
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
779 AB001774.1 Bacteria;Chlamydiae;Chlamydiae;Chlamydiales;Chlamydiaceae;Chlamydophila;
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
780 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
781 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
782 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
783 Determines whether the file is a SequenceTaxonomy
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
784 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
785 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
786 pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;]){6}$'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
787 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
788 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
789 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
790 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
791 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
792 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
793 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
794 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
795 fields = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
796 if len(fields) != 2:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
797 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
798 if not re.match(pat,fields[1]):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
799 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
800 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
801 if count > 10:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
802 break
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
803 if count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
804 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
805 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
806 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
807 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
808 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
809 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
810
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
811 class ConsensusTaxonomy(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
812 file_ext = 'cons.taxonomy'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
813 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
814 """A list of names"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
815 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
816 self.column_names = ['OTU','count','taxonomy']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
817
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
818 class TaxonomySummary(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
819 file_ext = 'tax.summary'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
820 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
821 """A Summary of taxon classification"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
822 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
823 self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
824
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
825 class Phylip(data.Text):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
826 file_ext = 'phy'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
827
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
828 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
829 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
830 Determines whether the file is in Phylip format (Interleaved or Sequential)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
831 The first line of the input file contains the number of species and the
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
832 number of characters, in free format, separated by blanks (not by
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
833 commas). The information for each species follows, starting with a
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
834 ten-character species name (which can include punctuation marks and blanks),
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
835 and continuing with the characters for that species.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
836 http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
837 Interleaved Example:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
838 6 39
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
839 Archaeopt CGATGCTTAC CGCCGATGCT
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
840 HesperorniCGTTACTCGT TGTCGTTACT
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
841 BaluchitheTAATGTTAAT TGTTAATGTT
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
842 B. virginiTAATGTTCGT TGTTAATGTT
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
843 BrontosaurCAAAACCCAT CATCAAAACC
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
844 B.subtilisGGCAGCCAAT CACGGCAGCC
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
845
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
846 TACCGCCGAT GCTTACCGC
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
847 CGTTGTCGTT ACTCGTTGT
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
848 AATTGTTAAT GTTAATTGT
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
849 CGTTGTTAAT GTTCGTTGT
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
850 CATCATCAAA ACCCATCAT
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
851 AATCACGGCA GCCAATCAC
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
852 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
853 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
854 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
855 # counts line
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
856 line = fh.readline().strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
857 linePieces = line.split()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
858 count = int(linePieces[0])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
859 seq_len = int(linePieces[1])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
860 # data lines
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
861 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
862 TODO check data lines
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
863 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
864 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
865 # name is the first 10 characters
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
866 name = line[0:10]
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
867 seq = line[10:].strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
868 # nucleic base or amino acid 1-char designators (spaces allowed)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
869 bases = ''.join(seq.split())
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
870 # float per base (each separated by space)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
871 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
872 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
873 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
874 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
875 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
876 close(fh)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
877 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
878
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
879
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
880 class Axes(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
881 file_ext = 'axes'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
882
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
883 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
884 """Initialize axes datatype"""
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
885 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
886 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
887 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
888 Determines whether the file is an axes format
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
889 The first line may have column headings.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
890 The following lines have the name in the first column plus float columns for each axis.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
891 ==> 98_sq_phylip_amazon.fn.unique.pca.axes <==
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
892 group axis1 axis2
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
893 forest 0.000000 0.145743
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
894 pasture 0.145743 0.000000
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
895
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
896 ==> 98_sq_phylip_amazon.nmds.axes <==
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
897 axis1 axis2
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
898 U68589 0.262608 -0.077498
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
899 U68590 0.027118 0.195197
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
900 U68591 0.329854 0.014395
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
901 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
902 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
903 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
904 count = 0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
905 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
906 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
907 col_cnt = None
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
908 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
909 line = fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
910 line = line.strip()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
911 if not line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
912 break #EOF
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
913 if line:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
914 fields = line.split('\t')
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
915 if col_cnt == None: # ignore values in first line as they may be column headings
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
916 col_cnt = len(fields)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
917 else:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
918 if len(fields) != col_cnt :
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
919 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
920 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
921 for i in range(1, col_cnt):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
922 check = float(fields[i])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
923 except ValueError:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
924 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
925 count += 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
926 if count > 10:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
927 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
928 if count > 0:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
929 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
930 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
931 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
932 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
933 fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
934 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
935
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
936 ## Qiime Classes
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
937
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
938 class QiimeMetadataMapping(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
939 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
940 file_ext = 'qiimemapping'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
941
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
942 def __init__(self, **kwd):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
943 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
944 http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
945 Information about the samples necessary to perform the data analysis.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
946 # self.column_names = ['#SampleID','BarcodeSequence','LinkerPrimerSequence','Description']
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
947 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
948 Tabular.__init__( self, **kwd )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
949
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
950 def sniff( self, filename ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
951 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
952 Determines whether the file is a qiime mapping file
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
953 Just checking for an appropriate header line for now, could be improved
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
954 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
955 try:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
956 pat = '#SampleID(\t[a-zA-Z][a-zA-Z0-9_]*)*\tDescription'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
957 fh = open( filename )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
958 while True:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
959 line = dataset_fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
960 if re.match(pat,line):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
961 return True
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
962 except:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
963 pass
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
964 finally:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
965 close(fh)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
966 return False
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
967
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
968 def set_column_names(self, dataset):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
969 if dataset.has_data():
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
970 dataset_fh = open( dataset.file_name )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
971 line = dataset_fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
972 if line.startswith('#SampleID'):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
973 dataset.metadata.column_names = line.strip().split('\t');
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
974 dataset_fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
975
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
976 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
977 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
978 self.set_column_names(dataset)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
979
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
980 class QiimeOTU(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
981 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
982 Associates OTUs with sequence IDs
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
983 Example:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
984 0 FLP3FBN01C2MYD FLP3FBN01B2ALM
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
985 1 FLP3FBN01DF6NE FLP3FBN01CKW1J FLP3FBN01CHVM4
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
986 2 FLP3FBN01AXQ2Z
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
987 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
988 file_ext = 'qiimeotu'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
989
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
990 class QiimeOTUTable(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
991 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
992 #Full OTU Counts
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
993 #OTU ID PC.354 PC.355 PC.356 Consensus Lineage
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
994 0 0 1 0 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
995 1 1 3 1 Root;Bacteria
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
996 2 0 2 2 Root;Bacteria;Bacteroidetes
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
997 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
998 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
999 file_ext = 'qiimeotutable'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1000 def init_meta( self, dataset, copy_from=None ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1001 tabular.Tabular.init_meta( self, dataset, copy_from=copy_from )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1002 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1003 self.set_column_names(dataset)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1004 def set_column_names(self, dataset):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1005 if dataset.has_data():
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1006 dataset_fh = open( dataset.file_name )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1007 line = dataset_fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1008 line = dataset_fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1009 if line.startswith('#OTU ID'):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1010 dataset.metadata.column_names = line.strip().split('\t');
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1011 dataset_fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1012 dataset.metadata.comment_lines = 2
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1013
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1014 class QiimeDistanceMatrix(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1015 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1016 PC.354 PC.355 PC.356
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1017 PC.354 0.0 3.177 1.955
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1018 PC.355 3.177 0.0 3.444
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1019 PC.356 1.955 3.444 0.0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1020 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1021 file_ext = 'qiimedistmat'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1022 def init_meta( self, dataset, copy_from=None ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1023 tabular.Tabular.init_meta( self, dataset, copy_from=copy_from )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1024 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1025 self.set_column_names(dataset)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1026 def set_column_names(self, dataset):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1027 if dataset.has_data():
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1028 dataset_fh = open( dataset.file_name )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1029 line = dataset_fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1030 # first line contains the names
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1031 dataset.metadata.column_names = line.strip().split('\t');
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1032 dataset_fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1033 dataset.metadata.comment_lines = 1
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1034
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1035 class QiimePCA(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1036 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1037 Principal Coordinate Analysis Data
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1038 The principal coordinate (PC) axes (columns) for each sample (rows).
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1039 Pairs of PCs can then be graphed to view the relationships between samples.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1040 The bottom of the output file contains the eigenvalues and % variation explained for each PC.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1041 Example:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1042 pc vector number 1 2 3
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1043 PC.354 -0.309063936588 0.0398252112257 0.0744672231759
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1044 PC.355 -0.106593922619 0.141125998277 0.0780204374172
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1045 PC.356 -0.219869362955 0.00917241121781 0.0357281314115
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1046
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1047
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1048 eigvals 0.480220500471 0.163567082874 0.125594470811
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1049 % variation explained 51.6955484555 17.6079322939
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1050 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1051 file_ext = 'qiimepca'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1052
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1053 class QiimeParams(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1054 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1055 ###pick_otus_through_otu_table.py parameters###
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1056
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1057 # OTU picker parameters
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1058 pick_otus:otu_picking_method uclust
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1059 pick_otus:clustering_algorithm furthest
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1060
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1061 # Representative set picker parameters
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1062 pick_rep_set:rep_set_picking_method first
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1063 pick_rep_set:sort_by otu
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1064 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1065 file_ext = 'qiimeparams'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1066
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1067 class QiimePrefs(data.Text):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1068 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1069 A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py.
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1070 Example:
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1071 {
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1072 'background_color':'black',
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1073
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1074 'sample_coloring':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1075 {
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1076 'Treatment':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1077 {
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1078 'column':'Treatment',
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1079 'colors':(('red',(0,100,100)),('blue',(240,100,100)))
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1080 },
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1081 'DOB':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1082 {
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1083 'column':'DOB',
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1084 'colors':(('red',(0,100,100)),('blue',(240,100,100)))
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1085 }
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1086 },
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1087 'MONTE_CARLO_GROUP_DISTANCES':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1088 {
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1089 'Treatment': 10,
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1090 'DOB': 10
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1091 }
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1092 }
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1093 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1094 file_ext = 'qiimeprefs'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1095
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1096 class QiimeTaxaSummary(Tabular):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1097 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1098 Taxon PC.354 PC.355 PC.356
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1099 Root;Bacteria;Actinobacteria 0.0 0.177 0.955
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1100 Root;Bacteria;Firmicutes 0.177 0.0 0.444
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1101 Root;Bacteria;Proteobacteria 0.955 0.444 0.0
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1102 """
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1103 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1104 file_ext = 'qiimetaxsummary'
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1105
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1106 def set_column_names(self, dataset):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1107 if dataset.has_data():
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1108 dataset_fh = open( dataset.file_name )
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1109 line = dataset_fh.readline()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1110 if line.startswith('Taxon'):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1111 dataset.metadata.column_names = line.strip().split('\t');
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1112 dataset_fh.close()
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1113
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1114 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ):
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1115 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1116 self.set_column_names(dataset)
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1117
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1118 if __name__ == '__main__':
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1119 import doctest, sys
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1120 doctest.testmod(sys.modules[__name__])
e5c3175506b7 Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1121