annotate proteomics.py @ 4:09b89b345de2

Update
author Ira Cooke <iracooke@gmail.com>
date Sun, 09 Jun 2013 08:16:08 -0500
parents c10a62c886b8
children df8b867ab71a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
1 """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
2 Proteomics format classes
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
3 """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
4 import logging
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
5 import re
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
6 from galaxy.datatypes.data import *
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
7 from galaxy.datatypes.xml import *
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
8 from galaxy.datatypes.sniff import *
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
9 from galaxy.datatypes.binary import *
4
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
10 from galaxy.datatypes.interval import *
0
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
11
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
12 log = logging.getLogger(__name__)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
13
4
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
14 class ProtGff( Gff ):
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
15 """Tab delimited data in Gff format"""
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
16 file_ext = "prot_gff"
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
17 def set_peek( self, dataset, is_multi_byte=False ):
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
18 """Set the peek and blurb text"""
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
19 if not dataset.dataset.purged:
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
20 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
21 dataset.blurb = 'Proteogenomics GFF'
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
22 else:
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
23 dataset.peek = 'file does not exist'
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
24 dataset.blurb = 'file purged from disk'
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
25
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
26 def sniff( self, filename ):
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
27 handle = open(filename)
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
28 xmlns_re = re.compile("^##gff-version")
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
29 for i in range(3):
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
30 line = handle.readline()
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
31 if xmlns_re.match(line.strip()):
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
32 handle.close()
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
33 return True
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
34
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
35 handle.close()
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
36 return False
Ira Cooke <iracooke@gmail.com>
parents: 0
diff changeset
37
0
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
38
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
39 class Xls( Binary ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
40 """Class describing a binary excel spreadsheet file"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
41 file_ext = "xls"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
42
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
43 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
44 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
45 dataset.peek = "Excel Spreadsheet file"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
46 dataset.blurb = data.nice_size( dataset.get_size() )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
47 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
48 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
49 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
50 def display_peek( self, dataset ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
51 try:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
52 return dataset.peek
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
53 except:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
54 return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
55
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
56 class ProteomicsXml(GenericXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
57 """ An enhanced XML datatype used to reuse code across several
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
58 proteomic/mass-spec datatypes. """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
59
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
60 def sniff(self, filename):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
61 """ Determines whether the file is the correct XML type. """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
62 with open(filename, 'r') as contents:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
63 while True:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
64 line = contents.readline()
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
65 if line == None or not line.startswith('<?'):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
66 break
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
67 pattern = '^<(\w*:)?%s' % self.root # pattern match <root or <ns:root for any ns string
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
68 return line != None and re.match(pattern, line) != None
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
69
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
70 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
71 """Set the peek and blurb text"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
72 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
73 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
74 dataset.blurb = self.blurb
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
75 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
76 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
77 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
78
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
79 class PepXml(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
80 """pepXML data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
81 file_ext = "pepxml"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
82 blurb = 'pepXML data'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
83 root = "msms_pipeline_analysis"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
84
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
85
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
86 class MzML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
87 """mzML data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
88 file_ext = "mzml"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
89 blurb = 'mzML Mass Spectrometry data'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
90 root = "(mzML|indexedmzML)"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
91
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
92
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
93 class ProtXML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
94 """protXML data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
95 file_ext = "protxml"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
96 blurb = 'prot XML Search Results'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
97 root = "protein_summary"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
98
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
99
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
100 class MzXML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
101 """mzXML data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
102 file_ext = "mzXML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
103 blurb = "mzXML Mass Spectrometry data"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
104 root = "mzXML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
105
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
106 ## PSI datatypes
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
107 class MzIdentML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
108 file_ext = "mzid"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
109 blurb = "XML identified peptides and proteins."
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
110 root = "MzIdentML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
111
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
112
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
113 class TraML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
114 file_ext = "traML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
115 blurb = "TraML transition list"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
116 root = "TraML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
117
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
118
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
119 class MzQuantML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
120 file_ext = "mzq"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
121 blurb = "XML quantification data"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
122 root = "MzQuantML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
123
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
124
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
125 class Mgf( Text ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
126 """Mascot Generic Format data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
127 file_ext = "mgf"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
128
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
129 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
130 """Set the peek and blurb text"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
131 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
132 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
133 dataset.blurb = 'mgf Mascot Generic Format'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
134 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
135 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
136 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
137
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
138
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
139 def sniff( self, filename ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
140 mgf_begin_ions = "BEGIN IONS"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
141 max_lines=100
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
142
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
143 for i, line in enumerate( file( filename ) ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
144 line = line.rstrip( '\n\r' )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
145 if line==mgf_begin_ions:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
146 return True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
147 if i>max_lines:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
148 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
149
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
150
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
151 class MascotDat( Text ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
152 """Mascot search results """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
153 file_ext = "mascotdat"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
154
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
155 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
156 """Set the peek and blurb text"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
157 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
158 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
159 dataset.blurb = 'mascotdat Mascot Search Results'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
160 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
161 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
162 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
163
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
164
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
165 def sniff( self, filename ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
166 mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
167 max_lines=10
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
168
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
169 for i, line in enumerate( file( filename ) ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
170 line = line.rstrip( '\n\r' )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
171 if line==mime_version:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
172 return True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
173 if i>max_lines:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
174 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
175
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
176
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
177 class RAW( Binary ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
178 """Class describing a Thermo Finnigan binary RAW file"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
179 file_ext = "raw"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
180 def sniff( self, filename ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
181 # Thermo Finnigan RAW format is proprietary and hence not well documented.
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
182 # Files start with 2 bytes that seem to differ followed by F\0i\0n\0n\0i\0g\0a\0n
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
183 # This combination represents 17 bytes, but to play safe we read 20 bytes from
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
184 # the start of the file.
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
185 try:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
186 header = open( filename ).read(20)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
187 hexheader = binascii.b2a_hex( header )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
188 finnigan = binascii.hexlify( 'F\0i\0n\0n\0i\0g\0a\0n' )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
189 if hexheader.find(finnigan) != -1:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
190 return True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
191 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
192 except:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
193 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
194 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
195 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
196 dataset.peek = "Thermo Finnigan RAW file"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
197 dataset.blurb = data.nice_size( dataset.get_size() )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
198 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
199 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
200 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
201 def display_peek( self, dataset ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
202 try:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
203 return dataset.peek
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
204 except:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
205 return "Thermo Finnigan RAW file (%s)" % ( data.nice_size( dataset.get_size() ) )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
206
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
207
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
208 if hasattr(Binary, 'register_sniffable_binary_format'):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
209 Binary.register_sniffable_binary_format('RAW', 'RAW', RAW)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
210
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
211
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
212 class Msp(Text):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
213 """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
214 file_ext = "msp"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
215
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
216 @staticmethod
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
217 def next_line_starts_with(contents, prefix):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
218 next_line = contents.readline()
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
219 return next_line != None and next_line.startswith(prefix)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
220
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
221 def sniff(self, filename):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
222 """ Determines whether the file is a NIST MSP output file.
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
223
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
224 >>> fname = get_test_fname('test.msp')
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
225 >>> Msp().sniff(fname)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
226 True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
227 >>> fname = get_test_fname('test.mzXML')
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
228 >>> Msp().sniff(fname)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
229 False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
230 """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
231 with open(filename, 'r') as contents:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
232 return Msp.next_line_starts_with(contents, "Name:") and Msp.next_line_starts_with(contents, "MW:")
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
233
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
234 class Ms2(Text):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
235 file_ext = "ms2"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
236
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
237 def sniff(self, filename):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
238 """ Determines whether the file is a valid ms2 file.
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
239
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
240 >>> fname = get_test_fname('test.msp')
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
241 >>> Ms2().sniff(fname)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
242 False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
243 >>> fname = get_test_fname('test.ms2')
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
244 >>> Ms2().sniff(fname)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
245 True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
246 """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
247
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
248 with open(filename, 'r') as contents:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
249 header_lines = []
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
250 while True:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
251 line = contents.readline()
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
252 if line == None or len(line) == 0:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
253 pass
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
254 elif line.startswith('H\t'):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
255 header_lines.append(line)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
256 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
257 break
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
258 for header_field in ['CreationDate', 'Extractor', 'ExtractorVersion', 'ExtractorOptions']:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
259 found_header = False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
260 for header_line in header_lines:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
261 if header_line.startswith('H\t%s' % (header_field)):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
262 found_header = True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
263 break
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
264 if not found_header:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
265 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
266
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
267 return True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
268
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
269 # unsniffable binary format, should do something about this
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
270 class XHunterAslFormat(Binary):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
271 """ Annotated Spectra in the HLF format http://www.thegpm.org/HUNTER/format_2006_09_15.html """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
272 file_ext = "hlf"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
273
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
274
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
275 if hasattr(Binary, 'register_unsniffable_binary_ext'):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
276 Binary.register_unsniffable_binary_ext('hlf')