Mercurial > repos > jjohnson > qiime
annotate lib/galaxy/datatypes/metagenomics.py @ 0:e5c3175506b7 default tip
Initial tool configs for qiime, most need work.
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Sun, 17 Jul 2011 10:30:11 -0500 |
parents | |
children |
rev | line source |
---|---|
0
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
2 metagenomics datatypes |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
3 James E Johnson - University of Minnesota |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
4 for Mothur |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
5 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
6 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
7 import data |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
8 import logging, os, sys, time, tempfile, shutil, string, glob, re |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
9 import galaxy.model |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
10 from galaxy.datatypes import metadata |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
11 from galaxy.datatypes import tabular |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
12 from galaxy.datatypes import sequence |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
13 from galaxy.datatypes.metadata import MetadataElement |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
14 from galaxy.datatypes.tabular import Tabular |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
15 from galaxy.datatypes.sequence import Fasta |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
16 from galaxy import util |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
17 from galaxy.datatypes.images import Html |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
18 from sniff import * |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
19 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
20 log = logging.getLogger(__name__) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
21 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
22 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
23 ## Mothur Classes |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
24 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
25 class Otu( Tabular ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
26 file_ext = 'otu' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
27 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
28 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
29 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
30 Determines whether the file is a otu (operational taxonomic unit) format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
31 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
32 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
33 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
34 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
35 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
36 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
37 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
38 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
39 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
40 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
41 if line[0] != '@': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
42 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
43 if len(linePieces) < 2: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
44 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
45 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
46 check = int(linePieces[1]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
47 if check + 2 != len(linePieces): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
48 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
49 except ValueError: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
50 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
51 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
52 if count == 5: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
53 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
54 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
55 if count < 5 and count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
56 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
57 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
58 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
59 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
60 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
61 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
62 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
63 class OtuList( Otu ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
64 file_ext = 'list' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
65 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
66 class Sabund( Otu ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
67 file_ext = 'sabund' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
68 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
69 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
70 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
71 Determines whether the file is a otu (operational taxonomic unit) format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
72 label<TAB>count[<TAB>value(1..n)] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
73 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
74 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
75 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
76 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
77 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
78 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
79 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
80 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
81 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
82 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
83 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
84 if line[0] != '@': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
85 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
86 if len(linePieces) < 2: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
87 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
88 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
89 check = int(linePieces[1]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
90 if check + 2 != len(linePieces): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
91 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
92 for i in range( 2, len(linePieces)): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
93 ival = int(linePieces[i]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
94 except ValueError: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
95 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
96 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
97 if count >= 5: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
98 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
99 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
100 if count < 5 and count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
101 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
102 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
103 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
104 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
105 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
106 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
107 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
108 class Rabund( Sabund ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
109 file_ext = 'rabund' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
110 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
111 class GroupAbund( Otu ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
112 file_ext = 'grpabund' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
113 def init_meta( self, dataset, copy_from=None ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
114 Otu.init_meta( self, dataset, copy_from=copy_from ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
115 def set_meta( self, dataset, overwrite = True, skip=1, max_data_lines = 100000, **kwd ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
116 # See if file starts with header line |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
117 if dataset.has_data(): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
118 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
119 fh = open( dataset.file_name ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
120 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
121 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
122 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
123 if linePieces[0] == 'label' and linePieces[1] == 'Group': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
124 skip=1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
125 else: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
126 skip=0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
127 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
128 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
129 Otu.set_meta( self, dataset, overwrite, skip, max_data_lines, **kwd) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
130 def sniff( self, filename, vals_are_int=False): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
131 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
132 Determines whether the file is a otu (operational taxonomic unit) Shared format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
133 label<TAB>group<TAB>count[<TAB>value(1..n)] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
134 The first line is column headings as of Mothur v 1.20 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
135 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
136 log.info( "sniff GroupAbund vals_are_int %s" % vals_are_int) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
137 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
138 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
139 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
140 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
141 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
142 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
143 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
144 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
145 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
146 if line[0] != '@': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
147 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
148 if len(linePieces) < 3: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
149 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
150 if count > 0 or linePieces[0] != 'label': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
151 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
152 check = int(linePieces[2]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
153 if check + 3 != len(linePieces): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
154 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
155 for i in range( 3, len(linePieces)): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
156 if vals_are_int: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
157 ival = int(linePieces[i]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
158 else: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
159 fval = float(linePieces[i]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
160 except ValueError: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
161 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
162 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
163 if count >= 5: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
164 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
165 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
166 if count < 5 and count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
167 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
168 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
169 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
170 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
171 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
172 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
173 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
174 class SharedRabund( GroupAbund ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
175 file_ext = 'shared' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
176 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
177 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
178 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
179 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
180 Determines whether the file is a otu (operational taxonomic unit) Shared format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
181 label<TAB>group<TAB>count[<TAB>value(1..n)] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
182 The first line is column headings as of Mothur v 1.20 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
183 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
184 # return GroupAbund.sniff(self,filename,True) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
185 isme = GroupAbund.sniff(self,filename,True) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
186 log.info( "is SharedRabund %s" % isme) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
187 return isme |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
188 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
189 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
190 class RelAbund( GroupAbund ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
191 file_ext = 'relabund' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
192 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
193 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
194 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
195 Determines whether the file is a otu (operational taxonomic unit) Relative Abundance format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
196 label<TAB>group<TAB>count[<TAB>value(1..n)] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
197 The first line is column headings as of Mothur v 1.20 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
198 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
199 # return GroupAbund.sniff(self,filename,False) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
200 isme = GroupAbund.sniff(self,filename,False) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
201 log.info( "is RelAbund %s" % isme) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
202 return isme |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
203 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
204 class SecondaryStructureMap(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
205 file_ext = 'map' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
206 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
207 """Initialize secondary structure map datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
208 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
209 self.column_names = ['Map'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
210 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
211 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
212 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
213 Determines whether the file is a secondary structure map format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
214 A single column with an integer value which indicates the row that this row maps to. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
215 check you make sure is structMap[10] = 380 then structMap[380] = 10. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
216 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
217 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
218 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
219 line_num = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
220 rowidxmap = {} |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
221 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
222 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
223 line_num += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
224 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
225 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
226 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
227 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
228 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
229 pointer = int(line) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
230 if pointer > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
231 if pointer > line_num: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
232 rowidxmap[line_num] = pointer |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
233 elif pointer < line_num & rowidxmap[pointer] != line_num: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
234 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
235 except ValueError: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
236 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
237 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
238 if count < 5 and count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
239 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
240 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
241 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
242 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
243 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
244 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
245 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
246 class SequenceAlignment( Fasta ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
247 file_ext = 'align' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
248 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
249 Fasta.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
250 """Initialize AlignCheck datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
251 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
252 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
253 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
254 Determines whether the file is in Mothur align fasta format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
255 Each sequence line must be the same length |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
256 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
257 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
258 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
259 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
260 len = -1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
261 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
262 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
263 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
264 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
265 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
266 if line: #first non-empty line |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
267 if line.startswith( '>' ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
268 #The next line.strip() must not be '', nor startwith '>' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
269 line = fh.readline().strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
270 if line == '' or line.startswith( '>' ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
271 break |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
272 if len < 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
273 len = len(line) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
274 elif len != len(line): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
275 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
276 else: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
277 break #we found a non-empty line, but its not a fasta header |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
278 if len > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
279 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
280 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
281 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
282 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
283 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
284 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
285 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
286 class AlignCheck( Tabular ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
287 file_ext = 'align.check' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
288 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
289 """Initialize AlignCheck datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
290 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
291 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
292 self.column_types = ['str','int','int','int','int','int','int','int'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
293 self.comment_lines = 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
294 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
295 def set_meta( self, dataset, overwrite = True, **kwd ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
296 # Tabular.set_meta( self, dataset, overwrite = overwrite, first_line_is_header = True, skip = 1 ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
297 data_lines = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
298 if dataset.has_data(): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
299 dataset_fh = open( dataset.file_name ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
300 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
301 line = dataset_fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
302 if not line: break |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
303 data_lines += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
304 dataset_fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
305 dataset.metadata.comment_lines = 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
306 dataset.metadata.data_lines = data_lines - 1 if data_lines > 0 else 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
307 dataset.metadata.column_names = self.column_names |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
308 dataset.metadata.column_types = self.column_types |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
309 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
310 class AlignReport(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
311 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
312 QueryName QueryLength TemplateName TemplateLength SearchMethod SearchScore AlignmentMethod QueryStart QueryEnd TemplateStart TemplateEnd PairwiseAlignmentLength GapsInQuery GapsInTemplate LongestInsert SimBtwnQuery&Template |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
313 AY457915 501 82283 1525 kmer 89.07 needleman 5 501 1 499 499 2 0 0 97.6 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
314 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
315 file_ext = 'align.report' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
316 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
317 """Initialize AlignCheck datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
318 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
319 self.column_names = ['QueryName','QueryLength','TemplateName','TemplateLength','SearchMethod','SearchScore', |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
320 'AlignmentMethod','QueryStart','QueryEnd','TemplateStart','TemplateEnd', |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
321 'PairwiseAlignmentLength','GapsInQuery','GapsInTemplate','LongestInsert','SimBtwnQuery&Template' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
322 ] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
323 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
324 class BellerophonChimera( Tabular ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
325 file_ext = 'bellerophon.chimera' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
326 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
327 """Initialize AlignCheck datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
328 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
329 self.column_names = ['Name','Score','Left','Right'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
330 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
331 class SecondaryStructureMatch(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
332 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
333 name pound dash plus equal loop tilde total |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
334 9_1_12 42 68 8 28 275 420 872 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
335 9_1_14 36 68 6 26 266 422 851 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
336 9_1_15 44 68 8 28 276 418 873 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
337 9_1_16 34 72 6 30 267 430 860 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
338 9_1_18 46 80 2 36 261 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
339 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
340 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
341 """Initialize SecondaryStructureMatch datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
342 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
343 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
344 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
345 class DistanceMatrix(data.Text): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
346 file_ext = 'dist' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
347 """Add metadata elements""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
348 MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
349 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
350 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
351 class LowerTriangleDistanceMatrix(DistanceMatrix): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
352 file_ext = 'lower.dist' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
353 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
354 """Initialize secondary structure map datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
355 DistanceMatrix.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
356 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
357 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
358 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
359 Determines whether the file is a lower-triangle distance matrix (phylip) format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
360 The first line has the number of sequences in the matrix. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
361 The remaining lines have the sequence name followed by a list of distances from all preceeding sequences |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
362 5 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
363 U68589 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
364 U68590 0.3371 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
365 U68591 0.3609 0.3782 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
366 U68592 0.4155 0.3197 0.4148 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
367 U68593 0.2872 0.1690 0.3361 0.2842 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
368 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
369 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
370 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
371 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
372 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
373 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
374 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
375 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
376 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
377 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
378 if line[0] != '@': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
379 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
380 if len(linePieces) != 3: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
381 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
382 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
383 check = float(linePieces[2]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
384 except ValueError: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
385 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
386 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
387 if count == 5: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
388 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
389 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
390 if count < 5 and count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
391 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
392 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
393 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
394 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
395 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
396 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
397 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
398 class SquareDistanceMatrix(DistanceMatrix,Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
399 file_ext = 'square.dist' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
400 sequence_count = -1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
401 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
402 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
403 """Initialize secondary structure map datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
404 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
405 def init_meta( self, dataset, copy_from=None ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
406 data.Text.init_meta( self, dataset, copy_from=copy_from ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
407 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
408 dataset.metadata.sequences = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
409 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
410 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
411 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
412 Determines whether the file is a square distance matrix (Column-formatted distance matrix) format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
413 The first line has the number of sequences in the matrix. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
414 The following lines have the sequence name in the first column plus a column for the distance to each sequence |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
415 in the row order in which they appear in the matrix. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
416 3 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
417 U68589 0.0000 0.3371 0.3610 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
418 U68590 0.3371 0.0000 0.3783 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
419 U68590 0.3371 0.0000 0.3783 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
420 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
421 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
422 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
423 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
424 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
425 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
426 sequence_count = int(line) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
427 col_cnt = seq_cnt + 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
428 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
429 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
430 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
431 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
432 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
433 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
434 if line[0] != '@': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
435 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
436 if len(linePieces) != col_cnt : |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
437 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
438 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
439 for i in range(1, col_cnt): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
440 check = float(linePieces[i]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
441 except ValueError: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
442 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
443 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
444 if count == 5: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
445 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
446 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
447 if count < 5 and count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
448 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
449 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
450 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
451 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
452 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
453 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
454 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
455 class PairwiseDistanceMatrix(DistanceMatrix,Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
456 file_ext = 'pair.dist' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
457 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
458 """Initialize secondary structure map datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
459 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
460 self.column_names = ['Sequence','Sequence','Distance'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
461 self.column_types = ['str','str','float'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
462 self.comment_lines = 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
463 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
464 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
465 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
466 Determines whether the file is a pairwise distance matrix (Column-formatted distance matrix) format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
467 The first and second columns have the sequence names and the third column is the distance between those sequences. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
468 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
469 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
470 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
471 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
472 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
473 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
474 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
475 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
476 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
477 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
478 if line[0] != '@': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
479 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
480 if len(linePieces) != 3: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
481 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
482 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
483 check = float(linePieces[2]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
484 except ValueError: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
485 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
486 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
487 if count == 5: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
488 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
489 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
490 if count < 5 and count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
491 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
492 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
493 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
494 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
495 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
496 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
497 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
498 class AlignCheck(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
499 file_ext = 'align.check' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
500 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
501 """Initialize secondary structure map datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
502 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
503 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
504 self.columns = 8 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
505 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
506 class Names(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
507 file_ext = 'names' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
508 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
509 """Name file shows the relationship between a representative sequence(col 1) and the sequences(comma-separated) it represents(col 2)""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
510 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
511 self.column_names = ['name','representatives'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
512 self.columns = 2 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
513 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
514 class Summary(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
515 file_ext = 'summary' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
516 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
517 """summarizes the quality of sequences in an unaligned or aligned fasta-formatted sequence file""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
518 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
519 self.column_names = ['seqname','start','end','nbases','ambigs','polymer'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
520 self.columns = 6 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
521 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
522 class Group(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
523 file_ext = 'groups' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
524 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
525 """Name file shows the relationship between a representative sequence(col 1) and the sequences it represents(col 2)""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
526 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
527 self.column_names = ['name','group'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
528 self.columns = 2 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
529 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
530 class Design(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
531 file_ext = 'design' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
532 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
533 """Name file shows the relationship between a group(col 1) and a grouping (col 2), providing a way to merge groups.""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
534 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
535 self.column_names = ['group','grouping'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
536 self.columns = 2 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
537 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
538 class AccNos(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
539 file_ext = 'accnos' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
540 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
541 """A list of names""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
542 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
543 self.column_names = ['name'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
544 self.columns = 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
545 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
546 class Oligos( data.Text ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
547 file_ext = 'oligos' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
548 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
549 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
550 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
551 Determines whether the file is a otu (operational taxonomic unit) format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
552 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
553 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
554 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
555 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
556 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
557 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
558 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
559 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
560 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
561 else: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
562 if line[0] != '#': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
563 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
564 if len(linePieces) == 2 and re.match('forward|reverse',linePieces[0]): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
565 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
566 continue |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
567 elif len(linePieces) == 3 and re.match('barcode',linePieces[0]): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
568 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
569 continue |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
570 else: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
571 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
572 if count > 20: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
573 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
574 if count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
575 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
576 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
577 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
578 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
579 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
580 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
581 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
582 class Frequency(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
583 file_ext = 'freq' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
584 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
585 """A list of names""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
586 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
587 self.column_names = ['position','frequency'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
588 self.column_types = ['int','float'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
589 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
590 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
591 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
592 Determines whether the file is a frequency tabular format for chimera analysis |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
593 #1.14.0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
594 0 0.000 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
595 1 0.000 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
596 ... |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
597 155 0.975 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
598 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
599 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
600 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
601 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
602 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
603 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
604 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
605 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
606 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
607 else: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
608 if line[0] != '#': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
609 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
610 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
611 i = int(linePieces[0]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
612 f = float(linePieces[1]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
613 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
614 continue |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
615 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
616 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
617 if count > 20: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
618 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
619 if count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
620 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
621 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
622 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
623 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
624 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
625 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
626 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
627 class Quantile(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
628 file_ext = 'quan' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
629 MetadataElement( name="filtered", default=False, no_value=False, optional=True , desc="Quantiles calculated using a mask", readonly=True) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
630 MetadataElement( name="masked", default=False, no_value=False, optional=True , desc="Quantiles calculated using a frequency filter", readonly=True) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
631 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
632 """Quantiles for chimera analysis""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
633 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
634 self.column_names = ['num','ten','twentyfive','fifty','seventyfive','ninetyfive','ninetynine'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
635 self.column_types = ['int','float','float','float','float','float','float'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
636 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
637 log.info( "Mothur Quantile set_meta %s" % kwd) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
638 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
639 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
640 Determines whether the file is a quantiles tabular format for chimera analysis |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
641 1 0 0 0 0 0 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
642 2 0.309198 0.309198 0.37161 0.37161 0.37161 0.37161 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
643 3 0.510982 0.563213 0.693529 0.858939 1.07442 1.20608 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
644 ... |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
645 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
646 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
647 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
648 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
649 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
650 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
651 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
652 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
653 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
654 else: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
655 if line[0] != '#': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
656 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
657 linePieces = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
658 i = int(linePieces[0]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
659 f = float(linePieces[1]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
660 f = float(linePieces[2]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
661 f = float(linePieces[3]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
662 f = float(linePieces[4]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
663 f = float(linePieces[5]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
664 f = float(linePieces[6]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
665 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
666 continue |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
667 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
668 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
669 if count > 10: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
670 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
671 if count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
672 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
673 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
674 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
675 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
676 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
677 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
678 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
679 class FilteredQuantile(Quantile): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
680 file_ext = 'filtered.quan' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
681 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
682 """Quantiles for chimera analysis""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
683 Quantile.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
684 self.filtered = True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
685 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
686 class MaskedQuantile(Quantile): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
687 file_ext = 'masked.quan' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
688 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
689 """Quantiles for chimera analysis""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
690 Quantile.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
691 self.masked = True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
692 self.filtered = False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
693 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
694 class FilteredMaskedQuantile(Quantile): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
695 file_ext = 'filtered.masked.quan' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
696 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
697 """Quantiles for chimera analysis""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
698 Quantile.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
699 self.masked = True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
700 self.filtered = True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
701 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
702 class LaneMask(data.Text): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
703 file_ext = 'filter' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
704 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
705 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
706 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
707 Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
708 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
709 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
710 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
711 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
712 buff = fh.read(1000) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
713 if not buff: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
714 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
715 else: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
716 if not re.match('^[01]+$',line): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
717 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
718 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
719 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
720 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
721 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
722 close(fh) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
723 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
724 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
725 class SequenceTaxonomy(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
726 file_ext = 'seq.taxonomy' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
727 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
728 A table with 2 columns: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
729 - SequenceName |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
730 - Taxonomy (semicolon-separated taxonomy in descending order) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
731 Example: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
732 X56533.1 Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma; |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
733 X97975.1 Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida; |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
734 AF052717.1 Eukaryota;Parabasalidea; |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
735 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
736 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
737 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
738 self.column_names = ['name','taxonomy'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
739 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
740 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
741 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
742 Determines whether the file is a SequenceTaxonomy |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
743 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
744 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
745 pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;])+$' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
746 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
747 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
748 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
749 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
750 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
751 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
752 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
753 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
754 fields = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
755 if len(fields) != 2: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
756 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
757 if not re.match(pat,fields[1]): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
758 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
759 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
760 if count > 10: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
761 break |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
762 if count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
763 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
764 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
765 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
766 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
767 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
768 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
769 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
770 class RDPSequenceTaxonomy(SequenceTaxonomy): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
771 file_ext = 'rdp.taxonomy' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
772 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
773 A table with 2 columns: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
774 - SequenceName |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
775 - Taxonomy (semicolon-separated taxonomy in descending order, RDP requires exactly 6 levels deep) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
776 Example: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
777 AB001518.1 Bacteria;Bacteroidetes;Sphingobacteria;Sphingobacteriales;unclassified_Sphingobacteriales; |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
778 AB001724.1 Bacteria;Cyanobacteria;Cyanobacteria;Family_II;GpIIa; |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
779 AB001774.1 Bacteria;Chlamydiae;Chlamydiae;Chlamydiales;Chlamydiaceae;Chlamydophila; |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
780 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
781 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
782 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
783 Determines whether the file is a SequenceTaxonomy |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
784 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
785 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
786 pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;]){6}$' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
787 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
788 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
789 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
790 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
791 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
792 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
793 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
794 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
795 fields = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
796 if len(fields) != 2: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
797 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
798 if not re.match(pat,fields[1]): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
799 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
800 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
801 if count > 10: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
802 break |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
803 if count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
804 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
805 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
806 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
807 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
808 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
809 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
810 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
811 class ConsensusTaxonomy(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
812 file_ext = 'cons.taxonomy' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
813 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
814 """A list of names""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
815 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
816 self.column_names = ['OTU','count','taxonomy'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
817 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
818 class TaxonomySummary(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
819 file_ext = 'tax.summary' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
820 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
821 """A Summary of taxon classification""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
822 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
823 self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
824 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
825 class Phylip(data.Text): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
826 file_ext = 'phy' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
827 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
828 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
829 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
830 Determines whether the file is in Phylip format (Interleaved or Sequential) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
831 The first line of the input file contains the number of species and the |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
832 number of characters, in free format, separated by blanks (not by |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
833 commas). The information for each species follows, starting with a |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
834 ten-character species name (which can include punctuation marks and blanks), |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
835 and continuing with the characters for that species. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
836 http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
837 Interleaved Example: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
838 6 39 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
839 Archaeopt CGATGCTTAC CGCCGATGCT |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
840 HesperorniCGTTACTCGT TGTCGTTACT |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
841 BaluchitheTAATGTTAAT TGTTAATGTT |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
842 B. virginiTAATGTTCGT TGTTAATGTT |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
843 BrontosaurCAAAACCCAT CATCAAAACC |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
844 B.subtilisGGCAGCCAAT CACGGCAGCC |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
845 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
846 TACCGCCGAT GCTTACCGC |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
847 CGTTGTCGTT ACTCGTTGT |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
848 AATTGTTAAT GTTAATTGT |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
849 CGTTGTTAAT GTTCGTTGT |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
850 CATCATCAAA ACCCATCAT |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
851 AATCACGGCA GCCAATCAC |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
852 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
853 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
854 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
855 # counts line |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
856 line = fh.readline().strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
857 linePieces = line.split() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
858 count = int(linePieces[0]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
859 seq_len = int(linePieces[1]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
860 # data lines |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
861 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
862 TODO check data lines |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
863 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
864 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
865 # name is the first 10 characters |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
866 name = line[0:10] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
867 seq = line[10:].strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
868 # nucleic base or amino acid 1-char designators (spaces allowed) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
869 bases = ''.join(seq.split()) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
870 # float per base (each separated by space) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
871 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
872 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
873 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
874 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
875 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
876 close(fh) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
877 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
878 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
879 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
880 class Axes(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
881 file_ext = 'axes' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
882 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
883 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
884 """Initialize axes datatype""" |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
885 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
886 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
887 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
888 Determines whether the file is an axes format |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
889 The first line may have column headings. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
890 The following lines have the name in the first column plus float columns for each axis. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
891 ==> 98_sq_phylip_amazon.fn.unique.pca.axes <== |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
892 group axis1 axis2 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
893 forest 0.000000 0.145743 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
894 pasture 0.145743 0.000000 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
895 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
896 ==> 98_sq_phylip_amazon.nmds.axes <== |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
897 axis1 axis2 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
898 U68589 0.262608 -0.077498 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
899 U68590 0.027118 0.195197 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
900 U68591 0.329854 0.014395 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
901 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
902 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
903 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
904 count = 0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
905 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
906 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
907 col_cnt = None |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
908 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
909 line = fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
910 line = line.strip() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
911 if not line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
912 break #EOF |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
913 if line: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
914 fields = line.split('\t') |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
915 if col_cnt == None: # ignore values in first line as they may be column headings |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
916 col_cnt = len(fields) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
917 else: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
918 if len(fields) != col_cnt : |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
919 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
920 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
921 for i in range(1, col_cnt): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
922 check = float(fields[i]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
923 except ValueError: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
924 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
925 count += 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
926 if count > 10: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
927 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
928 if count > 0: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
929 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
930 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
931 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
932 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
933 fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
934 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
935 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
936 ## Qiime Classes |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
937 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
938 class QiimeMetadataMapping(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
939 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
940 file_ext = 'qiimemapping' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
941 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
942 def __init__(self, **kwd): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
943 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
944 http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
945 Information about the samples necessary to perform the data analysis. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
946 # self.column_names = ['#SampleID','BarcodeSequence','LinkerPrimerSequence','Description'] |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
947 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
948 Tabular.__init__( self, **kwd ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
949 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
950 def sniff( self, filename ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
951 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
952 Determines whether the file is a qiime mapping file |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
953 Just checking for an appropriate header line for now, could be improved |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
954 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
955 try: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
956 pat = '#SampleID(\t[a-zA-Z][a-zA-Z0-9_]*)*\tDescription' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
957 fh = open( filename ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
958 while True: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
959 line = dataset_fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
960 if re.match(pat,line): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
961 return True |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
962 except: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
963 pass |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
964 finally: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
965 close(fh) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
966 return False |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
967 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
968 def set_column_names(self, dataset): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
969 if dataset.has_data(): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
970 dataset_fh = open( dataset.file_name ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
971 line = dataset_fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
972 if line.startswith('#SampleID'): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
973 dataset.metadata.column_names = line.strip().split('\t'); |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
974 dataset_fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
975 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
976 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
977 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
978 self.set_column_names(dataset) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
979 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
980 class QiimeOTU(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
981 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
982 Associates OTUs with sequence IDs |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
983 Example: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
984 0 FLP3FBN01C2MYD FLP3FBN01B2ALM |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
985 1 FLP3FBN01DF6NE FLP3FBN01CKW1J FLP3FBN01CHVM4 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
986 2 FLP3FBN01AXQ2Z |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
987 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
988 file_ext = 'qiimeotu' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
989 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
990 class QiimeOTUTable(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
991 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
992 #Full OTU Counts |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
993 #OTU ID PC.354 PC.355 PC.356 Consensus Lineage |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
994 0 0 1 0 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
995 1 1 3 1 Root;Bacteria |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
996 2 0 2 2 Root;Bacteria;Bacteroidetes |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
997 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
998 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
999 file_ext = 'qiimeotutable' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1000 def init_meta( self, dataset, copy_from=None ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1001 tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1002 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1003 self.set_column_names(dataset) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1004 def set_column_names(self, dataset): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1005 if dataset.has_data(): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1006 dataset_fh = open( dataset.file_name ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1007 line = dataset_fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1008 line = dataset_fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1009 if line.startswith('#OTU ID'): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1010 dataset.metadata.column_names = line.strip().split('\t'); |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1011 dataset_fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1012 dataset.metadata.comment_lines = 2 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1013 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1014 class QiimeDistanceMatrix(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1015 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1016 PC.354 PC.355 PC.356 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1017 PC.354 0.0 3.177 1.955 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1018 PC.355 3.177 0.0 3.444 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1019 PC.356 1.955 3.444 0.0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1020 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1021 file_ext = 'qiimedistmat' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1022 def init_meta( self, dataset, copy_from=None ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1023 tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1024 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1025 self.set_column_names(dataset) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1026 def set_column_names(self, dataset): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1027 if dataset.has_data(): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1028 dataset_fh = open( dataset.file_name ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1029 line = dataset_fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1030 # first line contains the names |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1031 dataset.metadata.column_names = line.strip().split('\t'); |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1032 dataset_fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1033 dataset.metadata.comment_lines = 1 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1034 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1035 class QiimePCA(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1036 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1037 Principal Coordinate Analysis Data |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1038 The principal coordinate (PC) axes (columns) for each sample (rows). |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1039 Pairs of PCs can then be graphed to view the relationships between samples. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1040 The bottom of the output file contains the eigenvalues and % variation explained for each PC. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1041 Example: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1042 pc vector number 1 2 3 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1043 PC.354 -0.309063936588 0.0398252112257 0.0744672231759 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1044 PC.355 -0.106593922619 0.141125998277 0.0780204374172 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1045 PC.356 -0.219869362955 0.00917241121781 0.0357281314115 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1046 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1047 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1048 eigvals 0.480220500471 0.163567082874 0.125594470811 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1049 % variation explained 51.6955484555 17.6079322939 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1050 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1051 file_ext = 'qiimepca' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1052 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1053 class QiimeParams(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1054 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1055 ###pick_otus_through_otu_table.py parameters### |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1056 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1057 # OTU picker parameters |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1058 pick_otus:otu_picking_method uclust |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1059 pick_otus:clustering_algorithm furthest |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1060 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1061 # Representative set picker parameters |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1062 pick_rep_set:rep_set_picking_method first |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1063 pick_rep_set:sort_by otu |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1064 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1065 file_ext = 'qiimeparams' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1066 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1067 class QiimePrefs(data.Text): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1068 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1069 A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1070 Example: |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1071 { |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1072 'background_color':'black', |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1073 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1074 'sample_coloring': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1075 { |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1076 'Treatment': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1077 { |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1078 'column':'Treatment', |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1079 'colors':(('red',(0,100,100)),('blue',(240,100,100))) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1080 }, |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1081 'DOB': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1082 { |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1083 'column':'DOB', |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1084 'colors':(('red',(0,100,100)),('blue',(240,100,100))) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1085 } |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1086 }, |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1087 'MONTE_CARLO_GROUP_DISTANCES': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1088 { |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1089 'Treatment': 10, |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1090 'DOB': 10 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1091 } |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1092 } |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1093 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1094 file_ext = 'qiimeprefs' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1095 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1096 class QiimeTaxaSummary(Tabular): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1097 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1098 Taxon PC.354 PC.355 PC.356 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1099 Root;Bacteria;Actinobacteria 0.0 0.177 0.955 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1100 Root;Bacteria;Firmicutes 0.177 0.0 0.444 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1101 Root;Bacteria;Proteobacteria 0.955 0.444 0.0 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1102 """ |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1103 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1104 file_ext = 'qiimetaxsummary' |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1105 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1106 def set_column_names(self, dataset): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1107 if dataset.has_data(): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1108 dataset_fh = open( dataset.file_name ) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1109 line = dataset_fh.readline() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1110 if line.startswith('Taxon'): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1111 dataset.metadata.column_names = line.strip().split('\t'); |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1112 dataset_fh.close() |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1113 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1114 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1115 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1116 self.set_column_names(dataset) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1117 |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1118 if __name__ == '__main__': |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1119 import doctest, sys |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1120 doctest.testmod(sys.modules[__name__]) |
e5c3175506b7
Initial tool configs for qiime, most need work.
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1121 |