comparison blast.py @ 4:f9a7783ed7b6 draft

Uploaded v0.0.14 adding BLAST database support. See also the matching update for the NCBI BLAST+ wrappers which use these new definitions. This update included work by Edward Kirton.
author peterjc
date Fri, 09 Nov 2012 06:50:05 -0500
parents 6ef523b390e0
children b3a3ba0c1d47
comparison
equal deleted inserted replaced
3:6ef523b390e0 4:f9a7783ed7b6
1 """ 1 """
2 BlastXml class 2 BlastXml class
3 """ 3 """
4 4
5 from galaxy.datatypes.data import get_file_peek 5 from galaxy.datatypes.data import get_file_peek
6 from galaxy.datatypes.data import Text 6 from galaxy.datatypes.data import Text, Data
7 from galaxy.datatypes.xml import GenericXml 7 from galaxy.datatypes.xml import GenericXml
8 from galaxy.datatypes.metadata import MetadataElement
8 9
9 class BlastXml( GenericXml ): 10 class BlastXml( GenericXml ):
10 """NCBI Blast XML Output data""" 11 """NCBI Blast XML Output data"""
11 file_ext = "blastxml" 12 file_ext = "blastxml"
12 13
16 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) 17 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
17 dataset.blurb = 'NCBI Blast XML data' 18 dataset.blurb = 'NCBI Blast XML data'
18 else: 19 else:
19 dataset.peek = 'file does not exist' 20 dataset.peek = 'file does not exist'
20 dataset.blurb = 'file purged from disk' 21 dataset.blurb = 'file purged from disk'
22
21 def sniff( self, filename ): 23 def sniff( self, filename ):
22 """ 24 """
23 Determines whether the file is blastxml 25 Determines whether the file is blastxml
24 26
25 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) 27 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' )
53 def merge(split_files, output_file): 55 def merge(split_files, output_file):
54 """Merging multiple XML files is non-trivial and must be done in subclasses.""" 56 """Merging multiple XML files is non-trivial and must be done in subclasses."""
55 if len(split_files) == 1: 57 if len(split_files) == 1:
56 #For one file only, use base class method (move/copy) 58 #For one file only, use base class method (move/copy)
57 return Text.merge(split_files, output_file) 59 return Text.merge(split_files, output_file)
60 if not split_files:
61 raise ValueError("Given no BLAST XML files, %r, to merge into %s" \
62 % (split_files, output_file))
58 out = open(output_file, "w") 63 out = open(output_file, "w")
59 h = None 64 h = None
60 for f in split_files: 65 for f in split_files:
61 h = open(f) 66 h = open(f)
62 body = False 67 body = False
120 out.write(" </BlastOutput_iterations>\n") 125 out.write(" </BlastOutput_iterations>\n")
121 out.write("</BlastOutput>\n") 126 out.write("</BlastOutput>\n")
122 out.close() 127 out.close()
123 merge = staticmethod(merge) 128 merge = staticmethod(merge)
124 129
130
131 class _BlastDb(object):
132 """Base class for BLAST database datatype."""
133
134 def set_peek( self, dataset, is_multi_byte=False ):
135 """Set the peek and blurb text."""
136 if not dataset.dataset.purged:
137 dataset.peek = "BLAST database (multiple files)"
138 dataset.blurb = "BLAST database (multiple files)"
139 else:
140 dataset.peek = 'file does not exist'
141 dataset.blurb = 'file purged from disk'
142
143 def display_peek( self, dataset ):
144 """Create HTML content, used for displaying peek."""
145 try:
146 return dataset.peek
147 except:
148 return "BLAST database (multiple files)"
149
150 def display_data(self, trans, data, preview=False, filename=None,
151 to_ext=None, size=None, offset=None, **kwd):
152 """Apparently an old display method, but still gets called.
153
154 This allows us to format the data shown in the central pane via the "eye" icon.
155 """
156 return "This is a BLAST database."
157
158 def get_mime(self):
159 """Returns the mime type of the datatype (pretend it is text for peek)"""
160 return 'text/plain'
161
162 def merge(split_files, output_file):
163 """Merge BLAST databases (not implemented for now)."""
164 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)")
165
166 def split( cls, input_datasets, subdir_generator_function, split_params):
167 """Split a BLAST database (not implemented for now)."""
168 if split_params is None:
169 return None
170 raise NotImplementedError("Can't split BLAST databases")
171
172
173 class BlastNucDb( _BlastDb, Data ):
174 """Class for nucleotide BLAST database files."""
175 file_ext = 'blastdbn'
176 composite_type ='basic'
177 MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
178
179 def __init__(self,**kwd):
180 Data.__init__(self, **kwd)
181 self.add_composite_file('blastdb.nhr')
182 self.add_composite_file('blastdb.nin')
183 self.add_composite_file('blastdb.nsq')
184 self.add_composite_file('blastdb.nhd', optional=True)
185 self.add_composite_file('blastdb.nsi', optional=True)
186 self.add_composite_file('blastdb.nhi', optional=True)
187 self.add_composite_file('blastdb.nog', optional=True)
188 self.add_composite_file('blastdb.nsd', optional=True)
189
190 def display_data(self, trans, data, preview=False, filename=None,
191 to_ext=None, size=None, offset=None, **kwd):
192 """Apparently an old display method, but still gets called.
193
194 This allows us to format the data shown in the central pane via the "eye" icon.
195 """
196 return "This is a BLAST nucleotide database."
197
198 class BlastProtDb( _BlastDb, Data ):
199 """Class for protein BLAST database files."""
200 file_ext = 'blastdbp'
201 composite_type ='basic'
202 MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
203
204 def __init__(self,**kwd):
205 Data.__init__(self, **kwd)
206 self.add_composite_file('blastdb.phr')
207 self.add_composite_file('blastdb.pin')
208 self.add_composite_file('blastdb.psq')
209 self.add_composite_file('blastdb.pnd', optional=True)
210 self.add_composite_file('blastdb.pni', optional=True)
211 self.add_composite_file('blastdb.psd', optional=True)
212 self.add_composite_file('blastdb.psi', optional=True)
213 self.add_composite_file('blastdb.psq', optional=True)
214 self.add_composite_file('blastdb.phd', optional=True)
215 self.add_composite_file('blastdb.phi', optional=True)
216 self.add_composite_file('blastdb.pog', optional=True)
217
218 def display_data(self, trans, data, preview=False, filename=None,
219 to_ext=None, size=None, offset=None, **kwd):
220 """Apparently an old display method, but still gets called.
221
222 This allows us to format the data shown in the central pane via the "eye" icon.
223 """
224 return "This is a BLAST protein database."