comparison enhanced_bowtie_wrapper.py @ 31:85becd29b52c draft

Uploaded
author kaymccoy
date Wed, 10 Aug 2016 12:24:08 -0400
parents
children
comparison
equal deleted inserted replaced
30:ca09ade7e699 31:85becd29b52c
1 #!/usr/bin/env python
2
3 """
4 Runs Bowtie on single-end or paired-end data.
5 For use with Bowtie v. 0.12.7
6
7 usage: bowtie_wrapper.py [options]
8 -t, --threads=t: The number of threads to run
9 -o, --output=o: The output file
10 --output_unmapped_reads=: File name for unmapped reads (single-end)
11 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
12 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
13 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
14 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
15 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
19 -2, --paired=2: Whether the data is single- or paired-end
20 -g, --genomeSource=g: The type of reference provided
21 -r, --ref=r: The reference genome to use or index
22 -s, --skip=s: Skip the first n reads
23 -a, --alignLimit=a: Only align the first n reads
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
28 -l, --seedLen=l: Seed length
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair
33 -v, --valAlign=v: Report up to n valid alignments per read or pair
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
44 -F, --offrate=F: Override the offrate of the index to n
45 -8, --snpphred=8: SNP penalty on Phred scale
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
48 -S, --seed=S: Seed for pseudo-random number generator
49 -C, --params=C: Whether to use default or specified parameters
50 -u, --iautoB=u: Automatic or specified behavior
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
54 -D, --idcv=D: The period for the difference-cover sample
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
62 -x, --indexSettings=x: Whether or not indexing options are to be set
63 -H, --suppressHeader=H: Suppress header
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
65 """
66
67 import optparse, os, shutil, subprocess, sys, tempfile
68
69 #Allow more than Sanger encoded variants
70 DEFAULT_ASCII_ENCODING = '--phred33-quals'
71 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
72 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
73
74 def stop_err( msg ):
75 sys.stderr.write( '%s\n' % msg )
76 sys.exit()
77
78 def __main__():
79 #Parse Command Line
80 parser = optparse.OptionParser()
81 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
82 parser.add_option( '-o', '--output', dest='output', help='The output file' )
83 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
84 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
85 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
86 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
87 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
88 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
89 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
90 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ or FASTA format' )
91
92
93
94 parser.add_option( '--filetype', dest='filetype', help='The filetype of your input reads - FASTA (f) or FASTQ (q)' )
95 parser.add_option( '--outtype', dest='outtype', help='The filetype of your output (nothing for map or -S for SAM)' )
96
97
98 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
99 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
100 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
101 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
102 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
103 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
104 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
105 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
106 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
107 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
108 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
109 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
110 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' )
111 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
112 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' )
113 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' )
114 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
115 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
116 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
117 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
118 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
119 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
120 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
121 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
122 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
123 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
124 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
125 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
126 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
127 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
128 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
129 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
130 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
131 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
132 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
133 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
134 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
135 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
136 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
137 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
138 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
139 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
140 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
141 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
142 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
143 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
144 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
145 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
146 (options, args) = parser.parse_args()
147 if options.mismatchSeed and options.maxMismatches:
148 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive")
149 stdout = ''
150
151 # make temp directory for placement of indices and copy reference file there if necessary
152 tmp_index_dir = tempfile.mkdtemp()
153 # get type of data (solid or solexa)
154 if options.dataType == 'solid':
155 colorspace = '-C'
156 else:
157 colorspace = ''
158 # index if necessary
159 if options.genomeSource == 'history' and not options.do_not_build_index:
160 # set up commands
161 if options.index_settings =='indexPreSet':
162 indexing_cmds = '%s' % colorspace
163 else:
164 try:
165 if options.iautoB and options.iautoB == 'set':
166 iautoB = '--noauto'
167 else:
168 iautoB = ''
169 if options.ipacked and options.ipacked == 'packed':
170 ipacked = '--packed'
171 else:
172 ipacked = ''
173 if options.ibmax and int( options.ibmax ) >= 1:
174 ibmax = '--bmax %s' % options.ibmax
175 else:
176 ibmax = ''
177 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
178 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
179 else:
180 ibmaxdivn = ''
181 if options.idcv and int( options.idcv ) >= 3:
182 idcv = '--dcv %s' % options.idcv
183 else:
184 idcv = ''
185 if options.inodc and options.inodc == 'nodc':
186 inodc = '--nodc'
187 else:
188 inodc = ''
189 if options.inoref and options.inoref == 'noref':
190 inoref = '--noref'
191 else:
192 inoref = ''
193 if options.iftab and int( options.iftab ) >= 1:
194 iftab = '--ftabchars %s' % options.iftab
195 else:
196 iftab = ''
197 if options.intoa and options.intoa == 'yes':
198 intoa = '--ntoa'
199 else:
200 intoa = ''
201 if options.iendian and options.iendian == 'big':
202 iendian = '--big'
203 else:
204 iendian = '--little'
205 if options.iseed and int( options.iseed ) > 0:
206 iseed = '--seed %s' % options.iseed
207 else:
208 iseed = ''
209 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
210 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
211 inoref, options.ioffrate, iftab, intoa, iendian,
212 iseed, colorspace )
213 except ValueError, e:
214 # clean up temp dir
215 if os.path.exists( tmp_index_dir ):
216 shutil.rmtree( tmp_index_dir )
217 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
218 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
219 ref_file_name = ref_file.name
220 ref_file.close()
221 os.symlink( options.ref, ref_file_name )
222 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
223 try:
224 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
225 tmp_stderr = open( tmp, 'wb' )
226 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
227 returncode = proc.wait()
228 tmp_stderr.close()
229 # get stderr, allowing for case where it's very large
230 tmp_stderr = open( tmp, 'rb' )
231 stderr = ''
232 buffsize = 1048576
233 try:
234 while True:
235 stderr += tmp_stderr.read( buffsize )
236 if not stderr or len( stderr ) % buffsize != 0:
237 break
238 except OverflowError:
239 pass
240 tmp_stderr.close()
241 if returncode != 0:
242 raise Exception, stderr
243 except Exception, e:
244 # clean up temp dir
245 if os.path.exists( tmp_index_dir ):
246 shutil.rmtree( tmp_index_dir )
247 stop_err( 'Error indexing reference sequence\n' + str( e ) )
248 stdout += 'File indexed. '
249 else:
250 ref_file_name = options.ref
251 # set up aligning and generate aligning command options
252 # automatically set threads in both cases
253 tmp_suppressed_file_name = None
254 tmp_unmapped_file_name = None
255 if options.suppressHeader == 'true':
256 suppressHeader = '--sam-nohead'
257 else:
258 suppressHeader = ''
259 if options.maxInsert and int( options.maxInsert ) > 0:
260 maxInsert = '-X %s' % options.maxInsert
261 else:
262 maxInsert = ''
263 if options.mateOrient:
264 mateOrient = '--%s' % options.mateOrient
265 else:
266 mateOrient = ''
267 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
268 if options.params == 'preSet':
269 aligning_cmds = '-f %s %s -p %s -S %s %s %s ' % \
270 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
271
272 else:
273 try:
274 if options.skip and int( options.skip ) > 0:
275 skip = '-s %s' % options.skip
276 else:
277 skip = ''
278 if options.alignLimit and int( options.alignLimit ) >= 0:
279 alignLimit = '-u %s' % options.alignLimit
280 else:
281 alignLimit = ''
282 if options.trimH and int( options.trimH ) > 0:
283 trimH = '-5 %s' % options.trimH
284 else:
285 trimH = ''
286 if options.trimL and int( options.trimL ) > 0:
287 trimL = '-3 %s' % options.trimL
288 else:
289 trimL = ''
290 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \
291 or options.maxMismatches == '2' or options.maxMismatches == '3'):
292 maxMismatches = '-v %s' % options.maxMismatches
293 else:
294 maxMismatches = ''
295 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
296 or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
297 mismatchSeed = '-n %s' % options.mismatchSeed
298 else:
299 mismatchSeed = ''
300 if options.mismatchQual and int( options.mismatchQual ) >= 1:
301 mismatchQual = '-e %s' % options.mismatchQual
302 else:
303 mismatchQual = ''
304 if options.seedLen and int( options.seedLen ) >= 5:
305 seedLen = '-l %s' % options.seedLen
306 else:
307 seedLen = ''
308 if options.rounding == 'noRound':
309 rounding = '--nomaqround'
310 else:
311 rounding = ''
312 if options.minInsert and int( options.minInsert ) > 0:
313 minInsert = '-I %s' % options.minInsert
314 else:
315 minInsert = ''
316 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
317 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
318 else:
319 maxAlignAttempt = ''
320 if options.forwardAlign == 'noForward':
321 forwardAlign = '--nofw'
322 else:
323 forwardAlign = ''
324 if options.reverseAlign == 'noReverse':
325 reverseAlign = '--norc'
326 else:
327 reverseAlign = ''
328 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
329 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
330 maxBacktracks = '--maxbts %s' % options.maxBacktracks
331 else:
332 maxBacktracks = ''
333 if options.tryHard == 'doTryHard':
334 tryHard = '-y'
335 else:
336 tryHard = ''
337 if options.valAlign and int( options.valAlign ) >= 0:
338 valAlign = '-k %s' % options.valAlign
339 else:
340 valAlign = ''
341 if options.allValAligns == 'doAllValAligns':
342 allValAligns = '-a'
343 else:
344 allValAligns = ''
345 if options.suppressAlign and int( options.suppressAlign ) >= 0:
346 suppressAlign = '-m %s' % options.suppressAlign
347 else:
348 suppressAlign = ''
349 if options.best == 'doBest':
350 best = '--best'
351 else:
352 best = ''
353 if options.strata == 'doStrata':
354 strata = '--strata'
355 else:
356 strata = ''
357 if options.offrate and int( options.offrate ) >= 0:
358 offrate = '-o %s' % options.offrate
359 else:
360 offrate = ''
361 if options.seed and int( options.seed ) >= 0:
362 seed = '--seed %s' % options.seed
363 else:
364 seed = ''
365 if options.paired == 'paired':
366 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
367 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
368 tmp_unmapped_file_name = tmp_unmapped_file.name
369 tmp_unmapped_file.close()
370 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
371 else:
372 output_unmapped_reads = ''
373 if options.output_suppressed_reads:
374 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
375 tmp_suppressed_file_name = tmp_suppressed_file.name
376 tmp_suppressed_file.close()
377 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
378 else:
379 output_suppressed_reads = ''
380 else:
381 if options.output_unmapped_reads:
382 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
383 else:
384 output_unmapped_reads = ''
385 if options.output_suppressed_reads:
386 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
387 else:
388 output_suppressed_reads = ''
389 snpfrac = ''
390 if options.snpphred and int( options.snpphred ) >= 0:
391 snpphred = '--snpphred %s' % options.snpphred
392 else:
393 snpphred = ''
394 if options.snpfrac and float( options.snpfrac ) >= 0:
395 snpfrac = '--snpfrac %s' % options.snpfrac
396 if options.keepends and options.keepends == 'doKeepends':
397 keepends = '--col-keepends'
398 else:
399 keepends = ''
400
401
402
403
404
405 if options.filetype == 'f':
406 filetype = '-f'
407 else:
408 filetype = '-q'
409
410 if options.outtype == 'S':
411 outtype = '-S'
412 else:
413 outtype = ''
414
415
416 aligning_cmds = '%s %s %s -p %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' \
417 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
418 ( filetype, maxInsert, mateOrient, options.threads, outtype, suppressHeader,
419 colorspace, skip, alignLimit, trimH, trimL, maxMismatches,
420 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
421 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
422 tryHard, valAlign, allValAligns, suppressAlign, best,
423 strata, offrate, seed, snpphred, snpfrac, keepends,
424 output_unmapped_reads, output_suppressed_reads,
425 quality_score_encoding )
426
427
428
429
430
431
432 except ValueError, e:
433 # clean up temp dir
434 if os.path.exists( tmp_index_dir ):
435 shutil.rmtree( tmp_index_dir )
436 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
437 try:
438 # have to nest try-except in try-finally to handle 2.4
439 try:
440 # prepare actual mapping commands
441 if options.paired == 'paired':
442 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
443 else:
444 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
445 # align
446 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
447 tmp_stderr = open( tmp, 'wb' )
448 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
449 returncode = proc.wait()
450 tmp_stderr.close()
451 # get stderr, allowing for case where it's very large
452 tmp_stderr = open( tmp, 'rb' )
453 stderr = ''
454 buffsize = 1048576
455 try:
456 while True:
457 stderr += tmp_stderr.read( buffsize )
458 if not stderr or len( stderr ) % buffsize != 0:
459 break
460 except OverflowError:
461 pass
462 tmp_stderr.close()
463 if returncode != 0:
464 raise Exception, stderr
465 # get suppressed and unmapped reads output files in place if appropriate
466 if options.paired == 'paired' and tmp_suppressed_file_name and \
467 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
468 try:
469 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
470 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
471 shutil.move( left, options.output_suppressed_reads_l )
472 shutil.move( right, options.output_suppressed_reads_r )
473 except Exception, e:
474 sys.stdout.write( 'Error producing the suppressed output file.\n' )
475 if options.paired == 'paired' and tmp_unmapped_file_name and \
476 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
477 try:
478 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
479 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
480 shutil.move( left, options.output_unmapped_reads_l )
481 shutil.move( right, options.output_unmapped_reads_r )
482 except Exception, e:
483 sys.stdout.write( 'Error producing the unmapped output file.\n' )
484 # check that there are results in the output file
485 if os.path.getsize( options.output ) == 0:
486 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
487 except Exception, e:
488 stop_err( 'Error aligning sequence. ' + str( e ) )
489 finally:
490 # clean up temp dir
491 if os.path.exists( tmp_index_dir ):
492 shutil.rmtree( tmp_index_dir )
493 stdout += 'Sequence file aligned.\n'
494 sys.stdout.write( stdout )
495
496 if __name__ == "__main__":
497 __main__()