Mercurial > repos > devteam > bowtie_wrappers
annotate bowtie_wrapper.py @ 4:df86f29bedee draft
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
| author | devteam |
|---|---|
| date | Tue, 21 Jul 2015 13:04:45 -0400 |
| parents | 0c7e4eadfb3c |
| children | 306077e393d4 |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 | |
| 3 """ | |
| 4 Runs Bowtie on single-end or paired-end data. | |
| 5 For use with Bowtie v. 0.12.7 | |
| 6 | |
| 7 usage: bowtie_wrapper.py [options] | |
| 8 -t, --threads=t: The number of threads to run | |
| 9 -o, --output=o: The output file | |
| 10 --output_unmapped_reads=: File name for unmapped reads (single-end) | |
| 11 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end) | |
| 12 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end) | |
| 13 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end) | |
| 14 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end) | |
| 15 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end) | |
| 16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format | |
| 17 -I, --input2=I: The reverse reads file in Sanger FASTQ format | |
| 18 -4, --dataType=4: The type of data (SOLiD or Solexa) | |
| 19 -2, --paired=2: Whether the data is single- or paired-end | |
| 20 -g, --genomeSource=g: The type of reference provided | |
| 21 -r, --ref=r: The reference genome to use or index | |
| 22 -s, --skip=s: Skip the first n reads | |
| 23 -a, --alignLimit=a: Only align the first n reads | |
| 24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment | |
| 25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment | |
| 26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed | |
| 27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions | |
| 28 -l, --seedLen=l: Seed length | |
| 29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30 | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode |
| 0 | 31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
33 -v, --valAlign=v: Report up to n valid alignments per read or pair |
| 0 | 34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist |
| 35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions | |
| 36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read | |
| 37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable | |
| 38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments | |
| 39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments | |
| 40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand | |
| 41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate | |
| 42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand | |
| 43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand | |
| 44 -F, --offrate=F: Override the offrate of the index to n | |
| 45 -8, --snpphred=8: SNP penalty on Phred scale | |
| 46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites | |
| 47 -7, --keepends=7: Keep extreme-end nucleotides and qualities | |
| 48 -S, --seed=S: Seed for pseudo-random number generator | |
| 49 -C, --params=C: Whether to use default or specified parameters | |
| 50 -u, --iautoB=u: Automatic or specified behavior | |
| 51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings | |
| 52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block | |
| 53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference | |
| 54 -D, --idcv=D: The period for the difference-cover sample | |
| 55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample | |
| 56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment | |
| 57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows | |
| 58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query | |
| 59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As | |
| 60 -N, --iendian=N: Endianness to use when serializing integers to the index file | |
| 61 -Z, --iseed=Z: Seed for the pseudorandom number generator | |
| 62 -x, --indexSettings=x: Whether or not indexing options are to be set | |
| 63 -H, --suppressHeader=H: Suppress header | |
| 64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' | |
| 65 """ | |
| 66 | |
| 67 import optparse, os, shutil, subprocess, sys, tempfile | |
| 68 | |
| 69 #Allow more than Sanger encoded variants | |
| 70 DEFAULT_ASCII_ENCODING = '--phred33-quals' | |
| 71 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' } | |
| 72 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config | |
| 73 | |
| 74 def stop_err( msg ): | |
| 75 sys.stderr.write( '%s\n' % msg ) | |
| 76 sys.exit() | |
| 77 | |
| 78 def __main__(): | |
| 79 #Parse Command Line | |
| 80 parser = optparse.OptionParser() | |
| 81 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' ) | |
| 82 parser.add_option( '-o', '--output', dest='output', help='The output file' ) | |
| 83 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' ) | |
| 84 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' ) | |
| 85 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' ) | |
| 86 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' ) | |
| 87 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' ) | |
| 88 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' ) | |
| 89 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' ) | |
| 90 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) | |
| 91 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) | |
| 92 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' ) | |
| 93 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' ) | |
| 94 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' ) | |
| 95 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' ) | |
| 96 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' ) | |
| 97 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' ) | |
| 98 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' ) | |
| 99 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' ) | |
| 100 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' ) | |
| 101 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' ) | |
| 102 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' ) | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
103 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' ) |
| 0 | 104 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' ) |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
105 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' ) |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
106 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' ) |
| 0 | 107 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' ) |
| 108 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" ) | |
| 109 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' ) | |
| 110 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' ) | |
| 111 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' ) | |
| 112 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' ) | |
| 113 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' ) | |
| 114 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' ) | |
| 115 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' ) | |
| 116 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' ) | |
| 117 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' ) | |
| 118 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' ) | |
| 119 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' ) | |
| 120 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' ) | |
| 121 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' ) | |
| 122 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' ) | |
| 123 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' ) | |
| 124 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' ) | |
| 125 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' ) | |
| 126 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' ) | |
| 127 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' ) | |
| 128 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' ) | |
| 129 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' ) | |
| 130 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' ) | |
| 131 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' ) | |
| 132 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' ) | |
| 133 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' ) | |
| 134 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' ) | |
| 135 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) | |
| 136 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' ) | |
| 137 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' ) | |
| 138 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) | |
| 139 (options, args) = parser.parse_args() | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
140 if options.mismatchSeed and options.maxMismatches: |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
141 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive") |
| 0 | 142 stdout = '' |
| 143 | |
| 144 # make temp directory for placement of indices and copy reference file there if necessary | |
| 145 tmp_index_dir = tempfile.mkdtemp() | |
| 146 # get type of data (solid or solexa) | |
| 147 if options.dataType == 'solid': | |
| 148 colorspace = '-C' | |
| 149 else: | |
| 150 colorspace = '' | |
| 151 # index if necessary | |
| 152 if options.genomeSource == 'history' and not options.do_not_build_index: | |
| 153 # set up commands | |
| 154 if options.index_settings =='indexPreSet': | |
| 155 indexing_cmds = '%s' % colorspace | |
| 156 else: | |
| 157 try: | |
| 158 if options.iautoB and options.iautoB == 'set': | |
| 159 iautoB = '--noauto' | |
| 160 else: | |
| 161 iautoB = '' | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
162 if options.ipacked and options.ipacked == 'packed': |
| 0 | 163 ipacked = '--packed' |
| 164 else: | |
| 165 ipacked = '' | |
| 166 if options.ibmax and int( options.ibmax ) >= 1: | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
167 ibmax = '--bmax %s' % options.ibmax |
| 0 | 168 else: |
| 169 ibmax = '' | |
| 170 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0: | |
| 171 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn | |
| 172 else: | |
| 173 ibmaxdivn = '' | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
174 if options.idcv and int( options.idcv ) >= 3: |
| 0 | 175 idcv = '--dcv %s' % options.idcv |
| 176 else: | |
| 177 idcv = '' | |
| 178 if options.inodc and options.inodc == 'nodc': | |
| 179 inodc = '--nodc' | |
| 180 else: | |
| 181 inodc = '' | |
| 182 if options.inoref and options.inoref == 'noref': | |
| 183 inoref = '--noref' | |
| 184 else: | |
| 185 inoref = '' | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
186 if options.iftab and int( options.iftab ) >= 1: |
| 0 | 187 iftab = '--ftabchars %s' % options.iftab |
| 188 else: | |
| 189 iftab = '' | |
| 190 if options.intoa and options.intoa == 'yes': | |
| 191 intoa = '--ntoa' | |
| 192 else: | |
| 193 intoa = '' | |
| 194 if options.iendian and options.iendian == 'big': | |
| 195 iendian = '--big' | |
| 196 else: | |
| 197 iendian = '--little' | |
| 198 if options.iseed and int( options.iseed ) > 0: | |
| 199 iseed = '--seed %s' % options.iseed | |
| 200 else: | |
| 201 iseed = '' | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
202 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \ |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
203 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
204 inoref, options.ioffrate, iftab, intoa, iendian, |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
205 iseed, colorspace ) |
| 0 | 206 except ValueError, e: |
| 207 # clean up temp dir | |
| 208 if os.path.exists( tmp_index_dir ): | |
| 209 shutil.rmtree( tmp_index_dir ) | |
| 210 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) | |
| 211 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir ) | |
| 212 ref_file_name = ref_file.name | |
| 213 ref_file.close() | |
| 214 os.symlink( options.ref, ref_file_name ) | |
| 215 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name ) | |
| 216 try: | |
| 217 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name | |
| 218 tmp_stderr = open( tmp, 'wb' ) | |
| 219 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) | |
| 220 returncode = proc.wait() | |
| 221 tmp_stderr.close() | |
| 222 # get stderr, allowing for case where it's very large | |
| 223 tmp_stderr = open( tmp, 'rb' ) | |
| 224 stderr = '' | |
| 225 buffsize = 1048576 | |
| 226 try: | |
| 227 while True: | |
| 228 stderr += tmp_stderr.read( buffsize ) | |
| 229 if not stderr or len( stderr ) % buffsize != 0: | |
| 230 break | |
| 231 except OverflowError: | |
| 232 pass | |
| 233 tmp_stderr.close() | |
| 234 if returncode != 0: | |
| 235 raise Exception, stderr | |
| 236 except Exception, e: | |
| 237 # clean up temp dir | |
| 238 if os.path.exists( tmp_index_dir ): | |
| 239 shutil.rmtree( tmp_index_dir ) | |
| 240 stop_err( 'Error indexing reference sequence\n' + str( e ) ) | |
| 241 stdout += 'File indexed. ' | |
| 242 else: | |
| 243 ref_file_name = options.ref | |
| 244 # set up aligning and generate aligning command options | |
| 245 # automatically set threads in both cases | |
| 246 tmp_suppressed_file_name = None | |
| 247 tmp_unmapped_file_name = None | |
| 248 if options.suppressHeader == 'true': | |
| 249 suppressHeader = '--sam-nohead' | |
| 250 else: | |
| 251 suppressHeader = '' | |
| 252 if options.maxInsert and int( options.maxInsert ) > 0: | |
| 253 maxInsert = '-X %s' % options.maxInsert | |
| 254 else: | |
| 255 maxInsert = '' | |
| 256 if options.mateOrient: | |
| 257 mateOrient = '--%s' % options.mateOrient | |
| 258 else: | |
| 259 mateOrient = '' | |
| 260 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING ) | |
| 261 if options.params == 'preSet': | |
| 262 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \ | |
| 263 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding ) | |
| 264 else: | |
| 265 try: | |
| 266 if options.skip and int( options.skip ) > 0: | |
| 267 skip = '-s %s' % options.skip | |
| 268 else: | |
| 269 skip = '' | |
| 270 if options.alignLimit and int( options.alignLimit ) >= 0: | |
| 271 alignLimit = '-u %s' % options.alignLimit | |
| 272 else: | |
| 273 alignLimit = '' | |
| 274 if options.trimH and int( options.trimH ) > 0: | |
| 275 trimH = '-5 %s' % options.trimH | |
| 276 else: | |
| 277 trimH = '' | |
| 278 if options.trimL and int( options.trimL ) > 0: | |
| 279 trimL = '-3 %s' % options.trimL | |
| 280 else: | |
| 281 trimL = '' | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
282 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \ |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
283 or options.maxMismatches == '2' or options.maxMismatches == '3'): |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
284 maxMismatches = '-v %s' % options.maxMismatches |
| 0 | 285 else: |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
286 maxMismatches = '' |
| 0 | 287 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ |
| 288 or options.mismatchSeed == '2' or options.mismatchSeed == '3'): | |
| 289 mismatchSeed = '-n %s' % options.mismatchSeed | |
| 290 else: | |
| 291 mismatchSeed = '' | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
292 if options.mismatchQual and int( options.mismatchQual ) >= 1: |
| 0 | 293 mismatchQual = '-e %s' % options.mismatchQual |
| 294 else: | |
| 295 mismatchQual = '' | |
| 296 if options.seedLen and int( options.seedLen ) >= 5: | |
| 297 seedLen = '-l %s' % options.seedLen | |
| 298 else: | |
| 299 seedLen = '' | |
| 300 if options.rounding == 'noRound': | |
| 301 rounding = '--nomaqround' | |
| 302 else: | |
| 303 rounding = '' | |
| 304 if options.minInsert and int( options.minInsert ) > 0: | |
| 305 minInsert = '-I %s' % options.minInsert | |
| 306 else: | |
| 307 minInsert = '' | |
| 308 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0: | |
| 309 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt | |
| 310 else: | |
| 311 maxAlignAttempt = '' | |
| 312 if options.forwardAlign == 'noForward': | |
| 313 forwardAlign = '--nofw' | |
| 314 else: | |
| 315 forwardAlign = '' | |
| 316 if options.reverseAlign == 'noReverse': | |
| 317 reverseAlign = '--norc' | |
| 318 else: | |
| 319 reverseAlign = '' | |
| 320 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \ | |
| 321 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ): | |
| 322 maxBacktracks = '--maxbts %s' % options.maxBacktracks | |
| 323 else: | |
| 324 maxBacktracks = '' | |
| 325 if options.tryHard == 'doTryHard': | |
| 326 tryHard = '-y' | |
| 327 else: | |
| 328 tryHard = '' | |
| 329 if options.valAlign and int( options.valAlign ) >= 0: | |
| 330 valAlign = '-k %s' % options.valAlign | |
| 331 else: | |
| 332 valAlign = '' | |
| 333 if options.allValAligns == 'doAllValAligns': | |
| 334 allValAligns = '-a' | |
| 335 else: | |
| 336 allValAligns = '' | |
| 337 if options.suppressAlign and int( options.suppressAlign ) >= 0: | |
| 338 suppressAlign = '-m %s' % options.suppressAlign | |
| 339 else: | |
| 340 suppressAlign = '' | |
| 341 if options.best == 'doBest': | |
| 342 best = '--best' | |
| 343 else: | |
| 344 best = '' | |
| 345 if options.strata == 'doStrata': | |
| 346 strata = '--strata' | |
| 347 else: | |
| 348 strata = '' | |
| 349 if options.offrate and int( options.offrate ) >= 0: | |
| 350 offrate = '-o %s' % options.offrate | |
| 351 else: | |
| 352 offrate = '' | |
| 353 if options.seed and int( options.seed ) >= 0: | |
| 354 seed = '--seed %s' % options.seed | |
| 355 else: | |
| 356 seed = '' | |
| 357 if options.paired == 'paired': | |
| 358 if options.output_unmapped_reads_l and options.output_unmapped_reads_r: | |
| 359 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' ) | |
| 360 tmp_unmapped_file_name = tmp_unmapped_file.name | |
| 361 tmp_unmapped_file.close() | |
| 362 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name | |
| 363 else: | |
| 364 output_unmapped_reads = '' | |
| 365 if options.output_suppressed_reads: | |
| 366 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' ) | |
| 367 tmp_suppressed_file_name = tmp_suppressed_file.name | |
| 368 tmp_suppressed_file.close() | |
| 369 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name | |
| 370 else: | |
| 371 output_suppressed_reads = '' | |
| 372 else: | |
| 373 if options.output_unmapped_reads: | |
| 374 output_unmapped_reads = '--un %s' % options.output_unmapped_reads | |
| 375 else: | |
| 376 output_unmapped_reads = '' | |
| 377 if options.output_suppressed_reads: | |
| 378 output_suppressed_reads = '--max %s' % options.output_suppressed_reads | |
| 379 else: | |
| 380 output_suppressed_reads = '' | |
| 381 snpfrac = '' | |
| 382 if options.snpphred and int( options.snpphred ) >= 0: | |
| 383 snpphred = '--snpphred %s' % options.snpphred | |
| 384 else: | |
| 385 snpphred = '' | |
| 386 if options.snpfrac and float( options.snpfrac ) >= 0: | |
| 387 snpfrac = '--snpfrac %s' % options.snpfrac | |
| 388 if options.keepends and options.keepends == 'doKeepends': | |
| 389 keepends = '--col-keepends' | |
| 390 else: | |
| 391 keepends = '' | |
| 392 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \ | |
| 393 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \ | |
| 394 ( maxInsert, mateOrient, options.threads, suppressHeader, | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
395 colorspace, skip, alignLimit, trimH, trimL, maxMismatches, |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
396 mismatchSeed, mismatchQual, seedLen, rounding, minInsert, |
| 0 | 397 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, |
| 398 tryHard, valAlign, allValAligns, suppressAlign, best, | |
| 399 strata, offrate, seed, snpphred, snpfrac, keepends, | |
| 400 output_unmapped_reads, output_suppressed_reads, | |
| 401 quality_score_encoding ) | |
| 402 except ValueError, e: | |
| 403 # clean up temp dir | |
| 404 if os.path.exists( tmp_index_dir ): | |
| 405 shutil.rmtree( tmp_index_dir ) | |
| 406 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) | |
| 407 try: | |
| 408 # have to nest try-except in try-finally to handle 2.4 | |
| 409 try: | |
| 410 # prepare actual mapping commands | |
| 411 if options.paired == 'paired': | |
| 412 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output ) | |
| 413 else: | |
| 414 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output ) | |
| 415 # align | |
| 416 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name | |
| 417 tmp_stderr = open( tmp, 'wb' ) | |
| 418 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) | |
| 419 returncode = proc.wait() | |
| 420 tmp_stderr.close() | |
| 421 # get stderr, allowing for case where it's very large | |
| 422 tmp_stderr = open( tmp, 'rb' ) | |
| 423 stderr = '' | |
| 424 buffsize = 1048576 | |
| 425 try: | |
| 426 while True: | |
| 427 stderr += tmp_stderr.read( buffsize ) | |
| 428 if not stderr or len( stderr ) % buffsize != 0: | |
| 429 break | |
| 430 except OverflowError: | |
| 431 pass | |
| 432 tmp_stderr.close() | |
| 433 if returncode != 0: | |
| 434 raise Exception, stderr | |
| 435 # get suppressed and unmapped reads output files in place if appropriate | |
| 436 if options.paired == 'paired' and tmp_suppressed_file_name and \ | |
| 437 options.output_suppressed_reads_l and options.output_suppressed_reads_r: | |
| 438 try: | |
| 439 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) | |
| 440 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) | |
| 441 shutil.move( left, options.output_suppressed_reads_l ) | |
| 442 shutil.move( right, options.output_suppressed_reads_r ) | |
| 443 except Exception, e: | |
| 444 sys.stdout.write( 'Error producing the suppressed output file.\n' ) | |
| 445 if options.paired == 'paired' and tmp_unmapped_file_name and \ | |
| 446 options.output_unmapped_reads_l and options.output_unmapped_reads_r: | |
| 447 try: | |
| 448 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' ) | |
| 449 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' ) | |
| 450 shutil.move( left, options.output_unmapped_reads_l ) | |
| 451 shutil.move( right, options.output_unmapped_reads_r ) | |
| 452 except Exception, e: | |
| 453 sys.stdout.write( 'Error producing the unmapped output file.\n' ) | |
| 454 # check that there are results in the output file | |
| 455 if os.path.getsize( options.output ) == 0: | |
| 456 raise Exception, 'The output file is empty, there may be an error with your input file or settings.' | |
| 457 except Exception, e: | |
| 458 stop_err( 'Error aligning sequence. ' + str( e ) ) | |
| 459 finally: | |
| 460 # clean up temp dir | |
| 461 if os.path.exists( tmp_index_dir ): | |
| 462 shutil.rmtree( tmp_index_dir ) | |
| 463 stdout += 'Sequence file aligned.\n' | |
| 464 sys.stdout.write( stdout ) | |
| 465 | |
|
4
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
466 if __name__ == "__main__": |
|
df86f29bedee
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
467 __main__() |
