comparison sra_tools-04cc8176e86f/sra.py @ 0:c386fe82db82

Initial commit, moving from test tool shed.
author matt-shirley
date Wed, 25 Sep 2013 21:00:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c386fe82db82
1 """
2 NCBI sra class
3 """
4 import logging
5 import binascii
6 from galaxy.datatypes.data import *
7 from galaxy.datatypes.sniff import *
8 from galaxy.datatypes.binary import *
9 from galaxy.datatypes.metadata import *
10
11 log = logging.getLogger(__name__)
12
13 class sra( Binary ):
14 """ Sequence Read Archive (SRA) """
15 file_ext = 'sra'
16
17 def __init__( self, **kwd ):
18 Binary.__init__( self, **kwd )
19 def sniff( self, filename ):
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ
21 submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
22 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
23 """
24 try:
25 header = open(filename).read(8)
26 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
27 return True
28 else:
29 return False
30 except:
31 return False
32 def set_peek(self, dataset, is_multi_byte=False):
33 if not dataset.dataset.purged:
34 dataset.peek = 'Binary sra file'
35 dataset.blurb = data.nice_size(dataset.get_size())
36 else:
37 dataset.peek = 'file does not exist'
38 dataset.blurb = 'file purged from disk'
39 def display_peek(self, dataset):
40 try:
41 return dataset.peek
42 except:
43 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
44
45 if hasattr(Binary, 'register_sniffable_binary_format'):
46 Binary.register_sniffable_binary_format('sra', 'sra', sra)