Mercurial > repos > devteam > bowtie_wrappers
comparison bowtie_wrapper.py @ 4:df86f29bedee draft
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
author | devteam |
---|---|
date | Tue, 21 Jul 2015 13:04:45 -0400 |
parents | 0c7e4eadfb3c |
children | 306077e393d4 |
comparison
equal
deleted
inserted
replaced
3:9ca609a2a421 | 4:df86f29bedee |
---|---|
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment | 25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment |
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed | 26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed |
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions | 27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions |
28 -l, --seedLen=l: Seed length | 28 -l, --seedLen=l: Seed length |
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30 | 29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30 |
30 -P, --maqSoapAlign=P: Choose MAQ- or SOAP-like alignment policy | 30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode |
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist | 31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist |
32 -v, --valAlign=v: Report up to n valid arguments per read | 32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair |
33 -V, --allValAligns=V: Whether or not to report all valid alignments per read | 33 -v, --valAlign=v: Report up to n valid alignments per read or pair |
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist | 34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist |
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions | 35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions |
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read | 36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read |
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable | 37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable |
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments | 38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments |
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows | 57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows |
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query | 58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query |
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As | 59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As |
60 -N, --iendian=N: Endianness to use when serializing integers to the index file | 60 -N, --iendian=N: Endianness to use when serializing integers to the index file |
61 -Z, --iseed=Z: Seed for the pseudorandom number generator | 61 -Z, --iseed=Z: Seed for the pseudorandom number generator |
62 -c, --icutoff=c: Number of first bases of the reference sequence to index | |
63 -x, --indexSettings=x: Whether or not indexing options are to be set | 62 -x, --indexSettings=x: Whether or not indexing options are to be set |
64 -H, --suppressHeader=H: Suppress header | 63 -H, --suppressHeader=H: Suppress header |
65 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' | 64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' |
66 """ | 65 """ |
67 | 66 |
99 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' ) | 98 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' ) |
100 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' ) | 99 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' ) |
101 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' ) | 100 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' ) |
102 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' ) | 101 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' ) |
103 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' ) | 102 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' ) |
104 parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' ) | 103 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' ) |
105 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' ) | 104 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' ) |
106 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' ) | 105 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' ) |
107 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' ) | 106 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' ) |
108 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' ) | 107 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' ) |
109 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" ) | 108 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" ) |
110 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' ) | 109 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' ) |
111 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' ) | 110 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' ) |
112 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' ) | 111 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' ) |
131 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' ) | 130 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' ) |
132 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' ) | 131 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' ) |
133 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' ) | 132 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' ) |
134 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' ) | 133 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' ) |
135 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' ) | 134 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' ) |
136 parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index' ) | |
137 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) | 135 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) |
138 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' ) | 136 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' ) |
139 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' ) | 137 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' ) |
140 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) | 138 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) |
141 (options, args) = parser.parse_args() | 139 (options, args) = parser.parse_args() |
140 if options.mismatchSeed and options.maxMismatches: | |
141 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive") | |
142 stdout = '' | 142 stdout = '' |
143 | 143 |
144 # make temp directory for placement of indices and copy reference file there if necessary | 144 # make temp directory for placement of indices and copy reference file there if necessary |
145 tmp_index_dir = tempfile.mkdtemp() | 145 tmp_index_dir = tempfile.mkdtemp() |
146 # get type of data (solid or solexa) | 146 # get type of data (solid or solexa) |
157 try: | 157 try: |
158 if options.iautoB and options.iautoB == 'set': | 158 if options.iautoB and options.iautoB == 'set': |
159 iautoB = '--noauto' | 159 iautoB = '--noauto' |
160 else: | 160 else: |
161 iautoB = '' | 161 iautoB = '' |
162 if options. ipacked and options.ipacked == 'packed': | 162 if options.ipacked and options.ipacked == 'packed': |
163 ipacked = '--packed' | 163 ipacked = '--packed' |
164 else: | 164 else: |
165 ipacked = '' | 165 ipacked = '' |
166 if options.ibmax and int( options.ibmax ) >= 1: | 166 if options.ibmax and int( options.ibmax ) >= 1: |
167 ibmax = '--bmax %s' % options.ibmax | 167 ibmax = '--bmax %s' % options.ibmax |
168 else: | 168 else: |
169 ibmax = '' | 169 ibmax = '' |
170 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0: | 170 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0: |
171 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn | 171 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn |
172 else: | 172 else: |
173 ibmaxdivn = '' | 173 ibmaxdivn = '' |
174 if options.idcv and int( options.idcv ) > 0: | 174 if options.idcv and int( options.idcv ) >= 3: |
175 idcv = '--dcv %s' % options.idcv | 175 idcv = '--dcv %s' % options.idcv |
176 else: | 176 else: |
177 idcv = '' | 177 idcv = '' |
178 if options.inodc and options.inodc == 'nodc': | 178 if options.inodc and options.inodc == 'nodc': |
179 inodc = '--nodc' | 179 inodc = '--nodc' |
181 inodc = '' | 181 inodc = '' |
182 if options.inoref and options.inoref == 'noref': | 182 if options.inoref and options.inoref == 'noref': |
183 inoref = '--noref' | 183 inoref = '--noref' |
184 else: | 184 else: |
185 inoref = '' | 185 inoref = '' |
186 if options.iftab and int( options.iftab ) >= 0: | 186 if options.iftab and int( options.iftab ) >= 1: |
187 iftab = '--ftabchars %s' % options.iftab | 187 iftab = '--ftabchars %s' % options.iftab |
188 else: | 188 else: |
189 iftab = '' | 189 iftab = '' |
190 if options.intoa and options.intoa == 'yes': | 190 if options.intoa and options.intoa == 'yes': |
191 intoa = '--ntoa' | 191 intoa = '--ntoa' |
197 iendian = '--little' | 197 iendian = '--little' |
198 if options.iseed and int( options.iseed ) > 0: | 198 if options.iseed and int( options.iseed ) > 0: |
199 iseed = '--seed %s' % options.iseed | 199 iseed = '--seed %s' % options.iseed |
200 else: | 200 else: |
201 iseed = '' | 201 iseed = '' |
202 if options.icutoff and int( options.icutoff ) > 0: | 202 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \ |
203 icutoff = '--cutoff %s' % options.icutoff | 203 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, |
204 else: | 204 inoref, options.ioffrate, iftab, intoa, iendian, |
205 icutoff = '' | 205 iseed, colorspace ) |
206 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \ | |
207 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, | |
208 inoref, options.ioffrate, iftab, intoa, iendian, | |
209 iseed, icutoff, colorspace ) | |
210 except ValueError, e: | 206 except ValueError, e: |
211 # clean up temp dir | 207 # clean up temp dir |
212 if os.path.exists( tmp_index_dir ): | 208 if os.path.exists( tmp_index_dir ): |
213 shutil.rmtree( tmp_index_dir ) | 209 shutil.rmtree( tmp_index_dir ) |
214 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) | 210 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) |
281 trimH = '' | 277 trimH = '' |
282 if options.trimL and int( options.trimL ) > 0: | 278 if options.trimL and int( options.trimL ) > 0: |
283 trimL = '-3 %s' % options.trimL | 279 trimL = '-3 %s' % options.trimL |
284 else: | 280 else: |
285 trimL = '' | 281 trimL = '' |
286 if options.maqSoapAlign != '-1' and int( options.maqSoapAlign ) >= 0: | 282 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \ |
287 maqSoapAlign = '-v %s' % options.maqSoapAlign | 283 or options.maxMismatches == '2' or options.maxMismatches == '3'): |
288 else: | 284 maxMismatches = '-v %s' % options.maxMismatches |
289 maqSoapAlign = '' | 285 else: |
286 maxMismatches = '' | |
290 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ | 287 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ |
291 or options.mismatchSeed == '2' or options.mismatchSeed == '3'): | 288 or options.mismatchSeed == '2' or options.mismatchSeed == '3'): |
292 mismatchSeed = '-n %s' % options.mismatchSeed | 289 mismatchSeed = '-n %s' % options.mismatchSeed |
293 else: | 290 else: |
294 mismatchSeed = '' | 291 mismatchSeed = '' |
295 if options.mismatchQual and int( options.mismatchQual ) >= 0: | 292 if options.mismatchQual and int( options.mismatchQual ) >= 1: |
296 mismatchQual = '-e %s' % options.mismatchQual | 293 mismatchQual = '-e %s' % options.mismatchQual |
297 else: | 294 else: |
298 mismatchQual = '' | 295 mismatchQual = '' |
299 if options.seedLen and int( options.seedLen ) >= 5: | 296 if options.seedLen and int( options.seedLen ) >= 5: |
300 seedLen = '-l %s' % options.seedLen | 297 seedLen = '-l %s' % options.seedLen |
393 else: | 390 else: |
394 keepends = '' | 391 keepends = '' |
395 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \ | 392 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \ |
396 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \ | 393 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \ |
397 ( maxInsert, mateOrient, options.threads, suppressHeader, | 394 ( maxInsert, mateOrient, options.threads, suppressHeader, |
398 colorspace, skip, alignLimit, trimH, trimL, maqSoapAlign, | 395 colorspace, skip, alignLimit, trimH, trimL, maxMismatches, |
399 mismatchSeed, mismatchQual, seedLen, rounding, minInsert, | 396 mismatchSeed, mismatchQual, seedLen, rounding, minInsert, |
400 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, | 397 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, |
401 tryHard, valAlign, allValAligns, suppressAlign, best, | 398 tryHard, valAlign, allValAligns, suppressAlign, best, |
402 strata, offrate, seed, snpphred, snpfrac, keepends, | 399 strata, offrate, seed, snpphred, snpfrac, keepends, |
403 output_unmapped_reads, output_suppressed_reads, | 400 output_unmapped_reads, output_suppressed_reads, |
404 quality_score_encoding ) | 401 quality_score_encoding ) |
464 if os.path.exists( tmp_index_dir ): | 461 if os.path.exists( tmp_index_dir ): |
465 shutil.rmtree( tmp_index_dir ) | 462 shutil.rmtree( tmp_index_dir ) |
466 stdout += 'Sequence file aligned.\n' | 463 stdout += 'Sequence file aligned.\n' |
467 sys.stdout.write( stdout ) | 464 sys.stdout.write( stdout ) |
468 | 465 |
469 if __name__=="__main__": __main__() | 466 if __name__ == "__main__": |
467 __main__() |