# HG changeset patch # User kaymccoy # Date 1470621648 14400 # Node ID 43a534fe1cde49b0053087c4b1fd44750fca3743 # Parent 4901e45ab80d5759344dc1efcfe4d7ff11f4589f Deleted selected files diff -r 4901e45ab80d -r 43a534fe1cde enhanced_bowtie_wrapper 1.0.0.py --- a/enhanced_bowtie_wrapper 1.0.0.py Sun Aug 07 21:59:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,497 +0,0 @@ -#!/usr/bin/env python - -""" -Runs Bowtie on single-end or paired-end data. -For use with Bowtie v. 0.12.7 - -usage: bowtie_wrapper.py [options] - -t, --threads=t: The number of threads to run - -o, --output=o: The output file - --output_unmapped_reads=: File name for unmapped reads (single-end) - --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end) - --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end) - --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end) - --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end) - --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end) - -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format - -I, --input2=I: The reverse reads file in Sanger FASTQ format - -4, --dataType=4: The type of data (SOLiD or Solexa) - -2, --paired=2: Whether the data is single- or paired-end - -g, --genomeSource=g: The type of reference provided - -r, --ref=r: The reference genome to use or index - -s, --skip=s: Skip the first n reads - -a, --alignLimit=a: Only align the first n reads - -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment - -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment - -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed - -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions - -l, --seedLen=l: Seed length - -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30 - -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode - -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist - -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair - -v, --valAlign=v: Report up to n valid alignments per read or pair - -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist - -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions - -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read - -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable - -j, --minInsert=j: Minimum insert size for valid paired-end alignments - -J, --maxInsert=J: Maximum insert size for valid paired-end alignments - -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand - -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate - -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand - -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand - -F, --offrate=F: Override the offrate of the index to n - -8, --snpphred=8: SNP penalty on Phred scale - -6, --snpfrac=6: Fraction of sites expected to be SNP sites - -7, --keepends=7: Keep extreme-end nucleotides and qualities - -S, --seed=S: Seed for pseudo-random number generator - -C, --params=C: Whether to use default or specified parameters - -u, --iautoB=u: Automatic or specified behavior - -K, --ipacked=K: Whether or not to use a packed representation for DNA strings - -Q, --ibmax=Q: Maximum number of suffixes allowed in a block - -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference - -D, --idcv=D: The period for the difference-cover sample - -U, --inodc=U: Whether or not to disable the use of the difference-cover sample - -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment - -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows - -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query - -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As - -N, --iendian=N: Endianness to use when serializing integers to the index file - -Z, --iseed=Z: Seed for the pseudorandom number generator - -x, --indexSettings=x: Whether or not indexing options are to be set - -H, --suppressHeader=H: Suppress header - --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' -""" - -import optparse, os, shutil, subprocess, sys, tempfile - -#Allow more than Sanger encoded variants -DEFAULT_ASCII_ENCODING = '--phred33-quals' -GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' } -#FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config - -def stop_err( msg ): - sys.stderr.write( '%s\n' % msg ) - sys.exit() - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' ) - parser.add_option( '-o', '--output', dest='output', help='The output file' ) - parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' ) - parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' ) - parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' ) - parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' ) - parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' ) - parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' ) - parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' ) - parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ or FASTA format' ) - - - - parser.add_option( '--filetype', dest='filetype', help='The filetype of your input reads - FASTA (f) or FASTQ (q)' ) - parser.add_option( '--outtype', dest='outtype', help='The filetype of your output (nothing for map or -S for SAM)' ) - - - parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) - parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' ) - parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' ) - parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' ) - parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' ) - parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' ) - parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' ) - parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' ) - parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' ) - parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' ) - parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' ) - parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' ) - parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' ) - parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' ) - parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' ) - parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' ) - parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' ) - parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" ) - parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' ) - parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' ) - parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' ) - parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' ) - parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' ) - parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' ) - parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' ) - parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' ) - parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' ) - parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' ) - parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' ) - parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' ) - parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' ) - parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' ) - parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' ) - parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' ) - parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' ) - parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' ) - parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' ) - parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' ) - parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' ) - parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' ) - parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' ) - parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' ) - parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' ) - parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' ) - parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) - parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' ) - parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' ) - parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) - (options, args) = parser.parse_args() - if options.mismatchSeed and options.maxMismatches: - parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive") - stdout = '' - - # make temp directory for placement of indices and copy reference file there if necessary - tmp_index_dir = tempfile.mkdtemp() - # get type of data (solid or solexa) - if options.dataType == 'solid': - colorspace = '-C' - else: - colorspace = '' - # index if necessary - if options.genomeSource == 'history' and not options.do_not_build_index: - # set up commands - if options.index_settings =='indexPreSet': - indexing_cmds = '%s' % colorspace - else: - try: - if options.iautoB and options.iautoB == 'set': - iautoB = '--noauto' - else: - iautoB = '' - if options.ipacked and options.ipacked == 'packed': - ipacked = '--packed' - else: - ipacked = '' - if options.ibmax and int( options.ibmax ) >= 1: - ibmax = '--bmax %s' % options.ibmax - else: - ibmax = '' - if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0: - ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn - else: - ibmaxdivn = '' - if options.idcv and int( options.idcv ) >= 3: - idcv = '--dcv %s' % options.idcv - else: - idcv = '' - if options.inodc and options.inodc == 'nodc': - inodc = '--nodc' - else: - inodc = '' - if options.inoref and options.inoref == 'noref': - inoref = '--noref' - else: - inoref = '' - if options.iftab and int( options.iftab ) >= 1: - iftab = '--ftabchars %s' % options.iftab - else: - iftab = '' - if options.intoa and options.intoa == 'yes': - intoa = '--ntoa' - else: - intoa = '' - if options.iendian and options.iendian == 'big': - iendian = '--big' - else: - iendian = '--little' - if options.iseed and int( options.iseed ) > 0: - iseed = '--seed %s' % options.iseed - else: - iseed = '' - indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \ - ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, - inoref, options.ioffrate, iftab, intoa, iendian, - iseed, colorspace ) - except ValueError, e: - # clean up temp dir - if os.path.exists( tmp_index_dir ): - shutil.rmtree( tmp_index_dir ) - stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) - ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir ) - ref_file_name = ref_file.name - ref_file.close() - os.symlink( options.ref, ref_file_name ) - cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name ) - try: - tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - except Exception, e: - # clean up temp dir - if os.path.exists( tmp_index_dir ): - shutil.rmtree( tmp_index_dir ) - stop_err( 'Error indexing reference sequence\n' + str( e ) ) - stdout += 'File indexed. ' - else: - ref_file_name = options.ref - # set up aligning and generate aligning command options - # automatically set threads in both cases - tmp_suppressed_file_name = None - tmp_unmapped_file_name = None - if options.suppressHeader == 'true': - suppressHeader = '--sam-nohead' - else: - suppressHeader = '' - if options.maxInsert and int( options.maxInsert ) > 0: - maxInsert = '-X %s' % options.maxInsert - else: - maxInsert = '' - if options.mateOrient: - mateOrient = '--%s' % options.mateOrient - else: - mateOrient = '' - quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING ) - if options.params == 'preSet': - aligning_cmds = '-f %s %s -p %s -S %s %s %s ' % \ - ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding ) - - else: - try: - if options.skip and int( options.skip ) > 0: - skip = '-s %s' % options.skip - else: - skip = '' - if options.alignLimit and int( options.alignLimit ) >= 0: - alignLimit = '-u %s' % options.alignLimit - else: - alignLimit = '' - if options.trimH and int( options.trimH ) > 0: - trimH = '-5 %s' % options.trimH - else: - trimH = '' - if options.trimL and int( options.trimL ) > 0: - trimL = '-3 %s' % options.trimL - else: - trimL = '' - if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \ - or options.maxMismatches == '2' or options.maxMismatches == '3'): - maxMismatches = '-v %s' % options.maxMismatches - else: - maxMismatches = '' - if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ - or options.mismatchSeed == '2' or options.mismatchSeed == '3'): - mismatchSeed = '-n %s' % options.mismatchSeed - else: - mismatchSeed = '' - if options.mismatchQual and int( options.mismatchQual ) >= 1: - mismatchQual = '-e %s' % options.mismatchQual - else: - mismatchQual = '' - if options.seedLen and int( options.seedLen ) >= 5: - seedLen = '-l %s' % options.seedLen - else: - seedLen = '' - if options.rounding == 'noRound': - rounding = '--nomaqround' - else: - rounding = '' - if options.minInsert and int( options.minInsert ) > 0: - minInsert = '-I %s' % options.minInsert - else: - minInsert = '' - if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0: - maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt - else: - maxAlignAttempt = '' - if options.forwardAlign == 'noForward': - forwardAlign = '--nofw' - else: - forwardAlign = '' - if options.reverseAlign == 'noReverse': - reverseAlign = '--norc' - else: - reverseAlign = '' - if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \ - ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ): - maxBacktracks = '--maxbts %s' % options.maxBacktracks - else: - maxBacktracks = '' - if options.tryHard == 'doTryHard': - tryHard = '-y' - else: - tryHard = '' - if options.valAlign and int( options.valAlign ) >= 0: - valAlign = '-k %s' % options.valAlign - else: - valAlign = '' - if options.allValAligns == 'doAllValAligns': - allValAligns = '-a' - else: - allValAligns = '' - if options.suppressAlign and int( options.suppressAlign ) >= 0: - suppressAlign = '-m %s' % options.suppressAlign - else: - suppressAlign = '' - if options.best == 'doBest': - best = '--best' - else: - best = '' - if options.strata == 'doStrata': - strata = '--strata' - else: - strata = '' - if options.offrate and int( options.offrate ) >= 0: - offrate = '-o %s' % options.offrate - else: - offrate = '' - if options.seed and int( options.seed ) >= 0: - seed = '--seed %s' % options.seed - else: - seed = '' - if options.paired == 'paired': - if options.output_unmapped_reads_l and options.output_unmapped_reads_r: - tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' ) - tmp_unmapped_file_name = tmp_unmapped_file.name - tmp_unmapped_file.close() - output_unmapped_reads = '--un %s' % tmp_unmapped_file_name - else: - output_unmapped_reads = '' - if options.output_suppressed_reads: - tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' ) - tmp_suppressed_file_name = tmp_suppressed_file.name - tmp_suppressed_file.close() - output_suppressed_reads = '--max %s' % tmp_suppressed_file_name - else: - output_suppressed_reads = '' - else: - if options.output_unmapped_reads: - output_unmapped_reads = '--un %s' % options.output_unmapped_reads - else: - output_unmapped_reads = '' - if options.output_suppressed_reads: - output_suppressed_reads = '--max %s' % options.output_suppressed_reads - else: - output_suppressed_reads = '' - snpfrac = '' - if options.snpphred and int( options.snpphred ) >= 0: - snpphred = '--snpphred %s' % options.snpphred - else: - snpphred = '' - if options.snpfrac and float( options.snpfrac ) >= 0: - snpfrac = '--snpfrac %s' % options.snpfrac - if options.keepends and options.keepends == 'doKeepends': - keepends = '--col-keepends' - else: - keepends = '' - - - - - - if options.filetype == 'f': - filetype = '-f' - else: - filetype = '-q' - - if options.outtype == 'S': - outtype = '-S' - else: - outtype = '' - - - aligning_cmds = '%s %s %s -p %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' \ - '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \ - ( filetype, maxInsert, mateOrient, options.threads, outtype, suppressHeader, - colorspace, skip, alignLimit, trimH, trimL, maxMismatches, - mismatchSeed, mismatchQual, seedLen, rounding, minInsert, - maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, - tryHard, valAlign, allValAligns, suppressAlign, best, - strata, offrate, seed, snpphred, snpfrac, keepends, - output_unmapped_reads, output_suppressed_reads, - quality_score_encoding ) - - - - - - - except ValueError, e: - # clean up temp dir - if os.path.exists( tmp_index_dir ): - shutil.rmtree( tmp_index_dir ) - stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) - try: - # have to nest try-except in try-finally to handle 2.4 - try: - # prepare actual mapping commands - if options.paired == 'paired': - cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output ) - else: - cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output ) - # align - tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - # get suppressed and unmapped reads output files in place if appropriate - if options.paired == 'paired' and tmp_suppressed_file_name and \ - options.output_suppressed_reads_l and options.output_suppressed_reads_r: - try: - left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) - right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) - shutil.move( left, options.output_suppressed_reads_l ) - shutil.move( right, options.output_suppressed_reads_r ) - except Exception, e: - sys.stdout.write( 'Error producing the suppressed output file.\n' ) - if options.paired == 'paired' and tmp_unmapped_file_name and \ - options.output_unmapped_reads_l and options.output_unmapped_reads_r: - try: - left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' ) - right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' ) - shutil.move( left, options.output_unmapped_reads_l ) - shutil.move( right, options.output_unmapped_reads_r ) - except Exception, e: - sys.stdout.write( 'Error producing the unmapped output file.\n' ) - # check that there are results in the output file - if os.path.getsize( options.output ) == 0: - raise Exception, 'The output file is empty, there may be an error with your input file or settings.' - except Exception, e: - stop_err( 'Error aligning sequence. ' + str( e ) ) - finally: - # clean up temp dir - if os.path.exists( tmp_index_dir ): - shutil.rmtree( tmp_index_dir ) - stdout += 'Sequence file aligned.\n' - sys.stdout.write( stdout ) - -if __name__ == "__main__": - __main__() diff -r 4901e45ab80d -r 43a534fe1cde enhanced_bowtie_wrapper 1.0.0.xml --- a/enhanced_bowtie_wrapper 1.0.0.xml Sun Aug 07 21:59:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,977 +0,0 @@ - - - bowtie - - - bowtie --version - - bowtie_wrapper.py - ## Set number of threads - --threads="\${GALAXY_SLOTS:-4}" - ## Outputs - - - - - #if "${singlePaired.sParams.outtype}" == "S" - --output="${outputS}" - #else - --output="${outputM}" - #end if - - - - #if str( $singlePaired.sPaired ) == "single" - #if $output_unmapped_reads_l - --output_unmapped_reads="${output_unmapped_reads_l}" - #end if - #if $output_suppressed_reads_l - --output_suppressed_reads="${output_suppressed_reads_l}" - #end if - --galaxy_input_format="${singlePaired.sInput1.ext}" - #else - #if $output_unmapped_reads_l and $output_unmapped_reads_r - --output_unmapped_reads_l="${output_unmapped_reads_l}" - --output_unmapped_reads_r="${output_unmapped_reads_r}" - #end if - #if $output_suppressed_reads_l and $output_suppressed_reads_l - --output_suppressed_reads_l="${output_suppressed_reads_l}" - --output_suppressed_reads_r="${output_suppressed_reads_r}" - #end if - --galaxy_input_format="${singlePaired.pInput1.ext}" - #end if - ## Inputs - --dataType="solexa" ##this indicates that nucleotide base space is used in the wrapper - --suppressHeader="${suppressHeader}" - --genomeSource="${refGenomeSource.genomeSource}" - #if $refGenomeSource.genomeSource == "history": - ##index already exists - #if $refGenomeSource.ownFile.extension.startswith( 'bowtie_' ): - ##user previously built - --ref="${refGenomeSource.ownFile.extra_files_path}/${refGenomeSource.ownFile.metadata.base_name}" - --do_not_build_index - #else: - ##build index on the fly - --ref="${refGenomeSource.ownFile}" - --indexSettings="${refGenomeSource.indexParams.indexSettings}" - #if $refGenomeSource.indexParams.indexSettings == "indexFull": - --iautoB="${refGenomeSource.indexParams.autoBehavior.autoB}" - #if $refGenomeSource.indexParams.autoBehavior.autoB == "set": - --ipacked="${refGenomeSource.indexParams.autoBehavior.packed}" - --ibmax="${refGenomeSource.indexParams.autoBehavior.bmax}" - --ibmaxdivn="${refGenomeSource.indexParams.autoBehavior.bmaxdivn}" - --idcv="${refGenomeSource.indexParams.autoBehavior.dcv}" - #end if - --inodc="${refGenomeSource.indexParams.nodc}" - --inoref="${refGenomeSource.indexParams.noref}" - --ioffrate="${refGenomeSource.indexParams.offrate}" - --iftab="${refGenomeSource.indexParams.ftab}" - --intoa="${refGenomeSource.indexParams.ntoa}" - --iendian="${refGenomeSource.indexParams.endian}" - --iseed="${refGenomeSource.indexParams.seed}" - #end if - #end if - #else - ##use pre-built index - --ref="${refGenomeSource.index.fields.path}" - #end if - --paired="${singlePaired.sPaired}" - #if $singlePaired.sPaired == "single": - - - - - - --filetype="${singlePaired.sParams.filetype}" - --outtype="${singlePaired.sParams.outtype}" - - - - --input1="${singlePaired.sInput1}" - --params="${singlePaired.sParams.sSettingsType}" - #if $singlePaired.sParams.sSettingsType == "full": - --skip="${singlePaired.sParams.sSkip}" - --alignLimit="${singlePaired.sParams.sAlignLimit}" - --trimH="${singlePaired.sParams.sTrimH}" - --trimL="${singlePaired.sParams.sTrimL}" - #if $singlePaired.sParams.alignModeOption.alignMode == 'nMode' - --mismatchSeed="${singlePaired.sParams.alignModeOption.sMismatchSeed}" - --mismatchQual="${singlePaired.sParams.alignModeOption.sMismatchQual}" - --seedLen="${singlePaired.sParams.alignModeOption.sSeedLen}" - --rounding="${singlePaired.sParams.alignModeOption.sRounding}" - #else - --maxMismatches="${singlePaired.sParams.alignModeOption.maxMismatches}" - #end if - --forwardAlign="${singlePaired.sParams.sForwardAlign}" - --reverseAlign="${singlePaired.sParams.sReverseAlign}" - --tryHard="${singlePaired.sParams.sBestOption.sTryHardOption.sTryHard}" - --allValAligns="${singlePaired.sParams.sAllValAlignsOption.sAllValAligns}" - #if $singlePaired.sParams.sAllValAlignsOption.sAllValAligns == "noAllValAligns" - --valAlign="${singlePaired.sParams.sAllValAlignsOption.sValAlign}" - #end if - --suppressAlign="${singlePaired.sParams.sSuppressAlign}" - --best="${singlePaired.sParams.sBestOption.sBest}" - #if $singlePaired.sParams.sBestOption.sBest == "doBest": - --strata="${singlePaired.sParams.sBestOption.sdStrata}" - #if $singlePaired.sParams.sBestOption.sTryHardOption.sTryHard == "noTryHard" - --maxBacktracks="${singlePaired.sParams.sBestOption.sTryHardOption.sdMaxBacktracks}" - #end if - #else: - #if $singlePaired.sParams.sBestOption.sTryHardOption.sTryHard == "noTryHard" - --maxBacktracks="${singlePaired.sParams.sBestOption.sTryHardOption.snMaxBacktracks}" - #end if - #end if - --offrate="${singlePaired.sParams.sOffrate}" - --seed="${singlePaired.sParams.sSeed}" - #end if - #else: - --input1="${singlePaired.pInput1}" - --input2="${singlePaired.pInput2}" - --maxInsert="${singlePaired.pMaxInsert}" - --mateOrient="${singlePaired.pMateOrient}" - --params="${singlePaired.pParams.pSettingsType}" - #if $singlePaired.pParams.pSettingsType == "full": - --skip="${singlePaired.pParams.pSkip}" - --alignLimit="${singlePaired.pParams.pAlignLimit}" - --trimH="${singlePaired.pParams.pTrimH}" - --trimL="${singlePaired.pParams.pTrimL}" - #if $singlePaired.pParams.alignModeOption.alignMode == 'nMode' - --mismatchSeed="${singlePaired.pParams.alignModeOption.pMismatchSeed}" - --mismatchQual="${singlePaired.pParams.alignModeOption.pMismatchQual}" - --seedLen="${singlePaired.pParams.alignModeOption.pSeedLen}" - --rounding="${singlePaired.pParams.alignModeOption.pRounding}" - #else - --maxMismatches="${singlePaired.pParams.alignModeOption.maxMismatches}" - #end if - --minInsert="${singlePaired.pParams.pMinInsert}" - --forwardAlign="${singlePaired.pParams.pForwardAlign}" - --reverseAlign="${singlePaired.pParams.pReverseAlign}" - --tryHard="${singlePaired.pParams.pBestOption.pTryHardOption.pTryHard}" - --allValAligns="${singlePaired.pParams.pAllValAlignsOption.pAllValAligns}" - #if $singlePaired.pParams.pAllValAlignsOption.pAllValAligns == "noAllValAligns" - --valAlign="${singlePaired.pParams.pAllValAlignsOption.pValAlign}" - #end if - --suppressAlign="${singlePaired.pParams.pSuppressAlign}" - --best="${singlePaired.pParams.pBestOption.pBest}" - #if $singlePaired.pParams.pBestOption.pBest == "doBest": - --strata="${singlePaired.pParams.pBestOption.pdStrata}" - #if $singlePaired.pParams.pBestOption.pTryHardOption.pTryHard == "noTryHard" - --maxAlignAttempt="${singlePaired.pParams.pBestOption.pTryHardOption.pMaxAlignAttempt}" - --maxBacktracks="${singlePaired.pParams.pBestOption.pTryHardOption.pdMaxBacktracks}" - #end if - #else: - #if $singlePaired.pParams.pBestOption.pTryHardOption.pTryHard == "noTryHard" - --maxAlignAttempt="${singlePaired.pParams.pBestOption.pTryHardOption.pMaxAlignAttempt}" - --maxBacktracks="${singlePaired.pParams.pBestOption.pTryHardOption.pnMaxBacktracks}" - #end if - #end if - --offrate="${singlePaired.pParams.pOffrate}" - --seed="${singlePaired.pParams.pSeed}" - #end if - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (( - singlePaired['sPaired'] == "single" and - singlePaired['sParams']['sSettingsType'] == "full" and - singlePaired['sParams']['sMaxFile'] is True - ) or ( - singlePaired['sPaired'] == "paired" and - singlePaired['pParams']['pSettingsType'] == "full" and - singlePaired['pParams']['pMaxFile'] is True - )) - - - - - - - - - - - - - - - - singlePaired['sPaired'] == "paired" - singlePaired['pParams']['pSettingsType'] == "full" - singlePaired['pParams']['pMaxFile'] is True - - - - - - - - - - - - - - - - (( - singlePaired['sPaired'] == "single" and - singlePaired['sParams']['sSettingsType'] == "full" and - singlePaired['sParams']['sUnmappedFile'] is True - ) or ( - singlePaired['sPaired'] == "paired" and - singlePaired['pParams']['pSettingsType'] == "full" and - singlePaired['pParams']['pUnmappedFile'] is True - )) - - - - - - - - - - - - - - - - singlePaired['sPaired'] == "paired" - singlePaired['pParams']['pSettingsType'] == "full" - singlePaired['pParams']['pUnmappedFile'] is True - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient. It is developed by Ben Langmead and Cole Trapnell. Please cite: Langmead B, Trapnell C, Pop M, Salzberg SL. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biology 10:R25. - -.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml - ------- - -**Know what you are doing** - -.. class:: warningmark - -There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. - - .. __: http://bowtie-bio.sourceforge.net/index.shtml - ------- - -**Input formats** - -Bowtie accepts files in Sanger FASTQ or FASTA format. - ------- - -**A Note on Built-in Reference Genomes** - -The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY. - ------- - -**Outputs** - -The output is in SAM format, and has the following columns:: - - Column Description - -------- -------------------------------------------------------- - 1 QNAME Query (pair) NAME - 2 FLAG bitwise FLAG - 3 RNAME Reference sequence NAME - 4 POS 1-based leftmost POSition/coordinate of clipped sequence - 5 MAPQ MAPping Quality (Phred-scaled) - 6 CIGAR extended CIGAR string - 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) - 8 MPOS 1-based Mate POSition - 9 ISIZE Inferred insert SIZE - 10 SEQ query SEQuence on the same strand as the reference - 11 QUAL query QUALity (ASCII-33 gives the Phred base quality) - 12 OPT variable OPTional fields in the format TAG:VTYPE:VALUE - -The flags are as follows:: - - Flag Description - ------ ------------------------------------- - 0x0001 the read is paired in sequencing - 0x0002 the read is mapped in a proper pair - 0x0004 the query sequence itself is unmapped - 0x0008 the mate is unmapped - 0x0010 strand of the query (1 for reverse) - 0x0020 strand of the mate - 0x0040 the read is the first read in a pair - 0x0080 the read is the second read in a pair - 0x0100 the alignment is not primary - -It looks like this (scroll sideways to see the entire example):: - - QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT - HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh - HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh - -------- - -**Bowtie settings** - -All of the options have a default value. You can change any of them. Most of the options in Bowtie have been implemented here. - ------- - -**Bowtie parameter list** - -This is an exhaustive list of Bowtie options: - -For indexing (bowtie-build):: - - -a No auto behavior. Disable the default behavior where bowtie automatically - selects values for --bmax/--bmaxdivn/--dcv/--packed parameters according - to the memory available. [off] - --packed Packing. Use a packed representation for DNA strings. [auto] - --bmax INT Suffix maximum. The maximum number of suffixes allowed in a block. [auto] - --bmaxdivn INT Suffix maximum fraction. The maximum number of suffixes allowed in a block - expressed as a fraction of the length of the reference. [4] - --dcv INT Difference-cover sample. Use INT as the period for the difference-cover - sample. [1024] - --nodc INT No difference-cover sample. Disable the difference-cover sample. [off] - -r No reference indexes. Do not build the NAME.3.ebwt and NAME.4.ebwt portions - of the index. Used only for paired-end alignment. [off] - -o Offrate. How many Burrows-Wheeler rows get marked by the indexer. The - indexer will mark every 2^INT rows. The marked rows correspond to rows on - the genome. [5] - -t INT The ftab lookup table used to calculate an initial Burrows-Wheeler range - with respect to the first INT characters of the query. Ftab size is 4^(INT+1) - bytes. [10] - --ntoa N conversion. Convert Ns to As before building the index. Otherwise, Ns are - simply excluded from the index and Bowtie will not find alignments that - overlap them. [off] - --big Endianness. Endianness to use when serializing integers to the index file. [off] - --little Endianness. [--little] - --seed INT Random seed. Use INT as the seed for the pseudo-random number generator. [off] - -For aligning (bowtie):: - - -s INT Skip. Do not align the first INT reads or pairs in the input. [off] - -u INT Align limit. Only align the first INT reads/pairs from the input. [no limit] - -5 INT High-quality trim. Trim INT bases from the high-quality (left) end of each - read before alignment. [0] - -3 INT Low-quality trim. Trim INT bases from the low-quality (right) end of each - read before alignment. [0] - -n INT Mismatch seed. Maximum number of mismatches permitted in the seed (defined - with seed length option). Can be 0, 1, 2, or 3. [2] - -e INT Mismatch quality. Maximum permitted total of quality values at mismatched - read positions. Bowtie rounds quality values to the nearest 10 and saturates - at 30. [70] - -l INT Seed length. The number of bases on the high-quality end of the read to - which the -n ceiling applies. Must be at least 5. [28] - --nomaqround Suppress Maq rounding. Values are internally rounded to the nearest 10 and - saturate at 30. This options turns off that rounding. [off] - -v INT Maq- or SOAP-like alignment policy. This option turns off the default - Maq-like alignment policy in favor of a SOAP-like one. End-to-end alignments - with at most INT mismatches. [off] - -I INT Minimum insert. The minimum insert size for valid paired-end alignments. - Does checking on untrimmed reads if -5 or -3 is used. [0] - -X INT Maximum insert. The maximum insert size for valid paired-end alignments. - Does checking on untrimmed reads if -5 or -3 is used. [250] - --fr Mate orientation. The upstream/downstream mate orientations for a valid - paired-end alignment against the forward reference strand. [--fr] - --rf Mate orientation. [off] - --ff Mate orientation. [off] - --pairtries INT Maximum alignment attempts for paired-end data. [100] - --nofw No forward aligning. Choosing this option means that Bowtie will not attempt - to align against the forward reference strand. [off] - --norc No reverse-complement aligning. Setting this will mean that Bowtie will not - attempt to align against the reverse-complement reference strand. [off] - --un FILENAME Write all reads that could not be aligned to file [off] - --max FILENAME Write all reads with a number of valid alignments exceeding the limit - set with the -m option to file [off] - --maxbts INT Maximum backtracks. The maximum number of backtracks permitted when aligning - a read in -n 2 or -n 3 mode. [125 without --best] [800 with --best] - -y Try hard. Try as hard as possible to find valid alignments when they exist, - including paired-end alignments. [off] - --chunkmbs INT Thread memory. The number of megabytes of memory a given thread is given to - store path descriptors in --best mode. [32] - -k INT Valid alignments. The number of valid alignments per read or pair. [off] - -a All valid alignments. Choosing this means that all valid alignments per read - or pair will be reported. [off] - -m INT Suppress alignments. Suppress all alignments for a particular read or pair - if more than INT reportable alignments exist for it. [no limit] - --best Best mode. Make Bowtie guarantee that reported singleton alignments are - "best" in terms of stratum (the number of mismatches) and quality values at - mismatched position. [off] - --strata Best strata. When running in best mode, report alignments that fall into the - best stratum if there are ones falling into more than one. [off] - -o INT Offrate override. Override the offrate of the index with INT. Some row - markings are discarded when index read into memory. INT must be greater than - the value used to build the index (default: 5). [off] - --seed INT Random seed. Use INT as the seed for the pseudo-random number generator. [off] - --snpphred INT Use INT as the SNP penalty for decoding colorspace alignments. True ratio of - SNPs per base in the subject genome. [see --snpfrac] - --snpfrac DEC Use DEC as the estimated ratio of SNPs per base when decoding colorspace - alignments. [0.001] - --col-keepends Keep the extreme-end nucleotides and qualities when decoding colorspace - alignments. [off] - - - diff -r 4901e45ab80d -r 43a534fe1cde tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Sun Aug 07 21:59:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ - - - - - value, dbkey, name, path - -
-
diff -r 4901e45ab80d -r 43a534fe1cde tool_dependencies.xml --- a/tool_dependencies.xml Sun Aug 07 21:59:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - - - - -