annotate lib/galaxy/datatypes/rtg.py @ 1:8593828f91e7 default tip

Full galaxy wrapper
author diego
date Sat, 21 Apr 2012 21:36:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
1 """
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
2 rtg datatypes
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
3 """
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
4
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
5 import data
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
6 from galaxy.datatypes import sequence
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
7 import logging, os, sys, time, tempfile, shutil, string, glob, re, subprocess
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
8 import galaxy.model
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
9 from galaxy.datatypes import metadata
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
10 from galaxy.datatypes.metadata import MetadataElement
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
11 from galaxy import util
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
12 from galaxy.datatypes.images import Html
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
13 from galaxy.datatypes.sequence import Sequence
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
14 from galaxy.datatypes.binary import Binary
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
15 from sniff import *
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
16 from pprint import pprint
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
17 from ConfigParser import ConfigParser
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
18
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
19 log = logging.getLogger(__name__)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
20 basepath = os.path.dirname(__file__)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
21 rtgcfg = os.path.abspath(os.path.join(basepath, "..", "..", "..", "tools", "rtg", "rtg-galaxy.cfg"))
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
22
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
23 class FakeSecHead(object):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
24 def __init__(self, fp):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
25 self.fp = fp
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
26 self.sechead = '[asection]\n'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
27 def readline(self):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
28 if self.sechead:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
29 try: return self.sechead
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
30 finally: self.sechead = None
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
31 else: return self.fp.readline()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
32
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
33 cfg = ConfigParser()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
34 cfg.readfp(FakeSecHead(open(rtgcfg)))
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
35
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
36 class Sdf( Html ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
37 composite_type = 'auto_primary_file'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
38 allow_datatype_change = False
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
39 file_ext = 'sdf'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
40
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
41 MetadataElement(name="sdfId", desc="SDF Id", readonly="true", param=metadata.MetadataParameter)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
42 MetadataElement(name="source", desc="Source", readonly="true", values=[('UNKNOWN', 'Unknown'), ('CG', 'Complete Genomics'), ('SOLEXA', 'Solexa')], param=metadata.SelectParameter)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
43 MetadataElement(name="sequences", desc="Number of Sequences", readonly="true", param=metadata.MetadataParameter)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
44 MetadataElement(name="hasQuality", desc="Has Quality", readonly="true", values=[('FALSE', 'False'), ('TRUE', 'True')], param=metadata.SelectParameter)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
45 MetadataElement(name="type", desc="Type", readonly="true", values=[('DNA', 'DNA'), ('PROTEIN', 'Protein')], param=metadata.SelectParameter)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
46 MetadataElement(name="paired", desc="Paired-End", readonly="true", values=[('FALSE', 'False'), ('TRUE', 'True')], param=metadata.SelectParameter)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
47 MetadataElement(name="maxLength", desc="Maximum sequence length", readonly="true", param=metadata.MetadataParameter)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
48 MetadataElement(name="minLength", desc="Minimum sequence length", readonly="true", param=metadata.MetadataParameter)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
49
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
50 def __init__( self, **kwd ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
51 Html.__init__( self, **kwd )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
52 log.debug( "Rtg log info %s" % ' __init__')
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
53 self.add_composite_file( 'format.log', mimetype = 'text/plain', description = 'Log', substitute_name_with_metadata = None, is_binary = False )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
54 self.add_composite_file( 'done', mimetype = 'text/plain', description = 'Completion', substitute_name_with_metadata = None, is_binary = False )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
55 self.add_composite_file( 'progress', mimetype = 'text/plain', description = 'Progress', substitute_name_with_metadata = None, is_binary = False )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
56 self.add_composite_file( 'mainIndex', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
57 self.add_composite_file( 'nameIndex0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
58 self.add_composite_file( 'namedata0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
59 self.add_composite_file( 'namepointer0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
60 self.add_composite_file( 'seqdata0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
61 self.add_composite_file( 'seqpointer0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
62
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
63 def generate_primary_file( self, dataset = None ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
64 log.debug( "Rtg log info %s %s" % ('generate_primary_file',dataset))
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
65 rval = ['<html><head><title>RTG SDF Dataset </title></head><p/>']
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
66 rval.append('<div>This SDF dataset is composed of the following files:<p/><ul>')
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
67 for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
68 fn = composite_name
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
69 log.debug( "Rtg log info %s %s %s" % ('generate_primary_file',fn,composite_file))
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
70 opt_text = ''
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
71 if composite_file.optional:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
72 opt_text = ' (optional)'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
73 if composite_file.get('description'):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
74 rval.append( '<li><a href="%s" type="application/octet-stream">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
75 else:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
76 rval.append( '<li><a href="%s" type="application/octet-stream">%s</a>%s</li>' % ( fn, fn, opt_text ) )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
77 rval.append( '</ul></div></html>' )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
78 return "\n".join( rval )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
79
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
80 def regenerate_primary_file(self,dataset):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
81 """
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
82 cannot do this until we are setting metadata
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
83 """
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
84 log.debug( "Rtg log info %s %s" % ('regenerate_primary_file',dataset))
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
85 bn = dataset.metadata.base_name
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
86 flist = os.listdir(dataset.extra_files_path)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
87 rval = ['<html><head><title>Files for RTG SDF Dataset %s</title></head><p/>Comprises the following files:<p/><ul>' % (bn)]
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
88 for i,fname in enumerate(flist):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
89 sfname = os.path.split(fname)[-1]
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
90 rval.append( '<li><a href="%s">%s</a>' % ( sfname, sfname ) )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
91 rval.append( '</ul></html>' )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
92 f = file(dataset.file_name,'w')
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
93 f.write("\n".join( rval ))
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
94 f.write('\n')
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
95 f.close()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
96
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
97 def set_meta( self, dataset, **kwd ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
98 Html.set_meta( self, dataset, **kwd )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
99 self.regenerate_primary_file(dataset)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
100 if (os.path.isdir(dataset.extra_files_path + '/left')):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
101 sdfDir = dataset.extra_files_path + '/left'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
102 dataset.metadata.paired = 'TRUE'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
103 else:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
104 sdfDir = dataset.extra_files_path
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
105 dataset.metadata.paired = 'FALSE'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
106 p = os.popen(cfg.get('asection', 'rtg') + ' sdfstats ' + sdfDir,"r")
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
107 while 1:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
108 line = p.readline()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
109 if not line:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
110 break
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
111 if line.startswith('SDF-ID'):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
112 dataset.metadata.sdfId = line.split(':', 1)[1].strip()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
113 elif line.startswith('Number of sequences'):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
114 dataset.metadata.sequences = line.split(':', 1)[1].strip()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
115 elif line.startswith('Type'):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
116 dataset.metadata.type = line.split(':', 1)[1].strip()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
117 elif line.startswith('Source'):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
118 dataset.metadata.source = line.split(':', 1)[1].strip()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
119 elif line.startswith('Quality scores available'):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
120 dataset.metadata.hasQuality = 'TRUE'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
121 elif line.startswith('Maximum length'):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
122 dataset.metadata.maxLength = line.split(':', 1)[1].strip()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
123 elif line.startswith('Minimum length'):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
124 dataset.metadata.minLength = line.split(':', 1)[1].strip()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
125 if dataset.metadata.hasQuality != 'TRUE':
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
126 dataset.metadata.hasQuality = 'FALSE'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
127
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
128 if __name__ == '__main__':
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
129 import doctest, sys
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
130 doctest.testmod(sys.modules[__name__])
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
131
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
132 class Cgtsv ( Sequence ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
133 """Class representing a generic CG TSV sequence"""
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
134 file_ext = "tsvcg"
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
135
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
136 def set_meta( self, dataset, **kwd ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
137 """
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
138 Set the number of sequences and the number of data lines
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
139 in dataset.
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
140 """
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
141 if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
142 dataset.metadata.sequences = None
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
143 return
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
144 sequences = 0
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
145 for line in file( dataset.file_name ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
146 line = line.strip()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
147 if line:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
148 if len(line) == 0 or line.startswith( '#' ) or line.startswith( '>' ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
149 # We don't count comment lines for sequence data types
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
150 continue
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
151 sequences += 1
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
152 dataset.metadata.sequences = sequences
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
153 def sniff ( self, filename ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
154 """
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
155 Determines whether the file is in CG TSV format
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
156 For details, see http://media.completegenomics.com/documents/DataFileFormats.pdf
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
157 """
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
158 bases_regexp = re.compile( "^[NGTAC]*" )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
159 headers = get_headers( filename, '\t' )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
160 try:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
161 count = 0
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
162 if len(headers) < 2:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
163 return False
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
164 for hdr in headers:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
165 if len( hdr ) > 1 and hdr[0]:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
166 if hdr[0].startswith( '#' ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
167 continue
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
168 if len(hdr) != 3:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
169 return False
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
170 if hdr[0].startswith( '>' ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
171 if hdr[0] != ">flags":
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
172 return False
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
173 if hdr[1] != "reads":
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
174 return False
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
175 else:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
176 try:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
177 map( int, [hdr[0]] )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
178 if not bases_regexp.match(hdr[1]):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
179 return False
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
180 except:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
181 return False
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
182 count += 1
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
183 if count >= 5:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
184 return True
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
185 # Do other necessary checking here...
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
186 except:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
187 return False
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
188 # If we haven't yet returned False, then...
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
189 return True
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
190
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
191 class Samix( Binary ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
192 """Class describing a tabix-ed SAM file"""
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
193 file_ext = "sam.gz"
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
194 MetadataElement( name="sam_index", desc="SAM Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
195 def init_meta( self, dataset, copy_from=None ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
196 Binary.init_meta( self, dataset, copy_from=copy_from )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
197 def set_meta( self, dataset, overwrite = True, **kwd ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
198 """ Creates the index for the SAM file. """
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
199 # These metadata values are not accessible by users, always overwrite
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
200 #f = open('/home/alan/galtmp', 'w')
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
201
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
202 index_file = dataset.metadata.sam_index
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
203 if not index_file:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
204 index_file = dataset.metadata.spec['sam_index'].param.new_file( dataset = dataset )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
205 # print >>f, 'idx file ', index_file, '\n'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
206 # Create the Sam index
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
207 stderr_name = tempfile.NamedTemporaryFile( prefix = "sam_index_stderr" ).name
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
208 command = cfg.get('asection', 'rtg') + (' index -f sam %s' % ( dataset.file_name))
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
209 #print >>f, 'idx cmd ', command, '\n'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
210 proc = subprocess.Popen( args=command, shell=True, stderr=open( stderr_name, 'wb' ) )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
211 exit_code = proc.wait()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
212 #Did index succeed?
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
213 stderr = open( stderr_name ).read().strip()
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
214 if stderr:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
215 if exit_code != 0:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
216 os.unlink( stderr_name ) #clean up
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
217 f.close();
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
218 raise Exception, "Error Setting tabix-ed SAM Metadata: %s" % stderr
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
219 else:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
220 print stderr
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
221 #print >>f, 'move ', dataset.file_name, '.tbi to ', index_file.file_name
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
222 shutil.move(dataset.file_name + '.tbi', index_file.file_name)
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
223 dataset.metadata.sam_index = index_file
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
224 # f.close();
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
225 # Remove temp file
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
226 os.unlink( stderr_name )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
227 def set_peek( self, dataset, is_multi_byte=False ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
228 if not dataset.dataset.purged:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
229 dataset.peek = "Tabix-ed sam alignments file"
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
230 dataset.blurb = data.nice_size( dataset.get_size() )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
231 else:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
232 dataset.peek = 'file does not exist'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
233 dataset.blurb = 'file purged from disk'
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
234 def display_peek( self, dataset ):
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
235 try:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
236 return dataset.peek
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
237 except:
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
238 return "Tabix-ed sam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) )
8593828f91e7 Full galaxy wrapper
diego
parents:
diff changeset
239