Mercurial > repos > matt-shirley > ncbi_sra_toolkit
annotate sra.py @ 10:16f96ab0196b
update to current toolkit version
author | Matt Shirley <mdshw5@gmail.com> |
---|---|
date | Tue, 03 Feb 2015 13:40:03 -0500 |
parents | 558a88cd49e4 |
children | ce2bb7841f17 |
rev | line source |
---|---|
3 | 1 """ |
2 NCBI sra class | |
3 """ | |
4 import logging | |
5 import binascii | |
6
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
6 from galaxy.datatypes.data import nice_size |
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
7 from galaxy.datatypes.binary import Binary |
3 | 8 |
9 log = logging.getLogger(__name__) | |
10 | |
6
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
11 class Sra(Binary): |
3 | 12 """ Sequence Read Archive (SRA) """ |
13 file_ext = 'sra' | |
14 | |
15 def __init__( self, **kwd ): | |
16 Binary.__init__( self, **kwd ) | |
6
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
17 |
3 | 18 def sniff( self, filename ): |
6
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
19 """ The first 8 bytes of any NCBI sra file is 'NCBI.sra', and the file is binary. |
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
20 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure |
3 | 21 """ |
22 try: | |
23 header = open(filename).read(8) | |
24 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): | |
25 return True | |
26 else: | |
27 return False | |
28 except: | |
29 return False | |
6
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
30 |
3 | 31 def set_peek(self, dataset, is_multi_byte=False): |
32 if not dataset.dataset.purged: | |
33 dataset.peek = 'Binary sra file' | |
6
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
34 dataset.blurb = nice_size(dataset.get_size()) |
3 | 35 else: |
36 dataset.peek = 'file does not exist' | |
37 dataset.blurb = 'file purged from disk' | |
6
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
38 |
3 | 39 def display_peek(self, dataset): |
40 try: | |
41 return dataset.peek | |
42 except: | |
6
e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
Matt Shirley <mdshw5@gmail.com>
parents:
3
diff
changeset
|
43 return 'Binary sra file (%s)' % (nice_size(dataset.get_size())) |