# HG changeset patch # User kaymccoy # Date 1470953741 14400 # Node ID a17d4f682e3f2613ca98451d1364724590f66edb # Parent 52da86fd981a41bd69cdc5750bef41a7f7185f3d Uploaded diff -r 52da86fd981a -r a17d4f682e3f enhanced_bowtie_mapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/enhanced_bowtie_mapper.xml Thu Aug 11 18:15:41 2016 -0400 @@ -0,0 +1,989 @@ + + + bowtie + + + bowtie --version + + enhanced_bowtie_wrapper.py + ## Set number of threads + --threads="\${GALAXY_SLOTS:-4}" + ## Outputs + + + + + #if "${singlePaired.sParams.outtype}" == "M" + --output="${outputM}" + #else + --output="${outputS}" + #end if + + + + #if str( $singlePaired.sPaired ) == "single" + #if $output_unmapped_reads_l + --output_unmapped_reads="${output_unmapped_reads_l}" + #end if + #if $output_suppressed_reads_l + --output_suppressed_reads="${output_suppressed_reads_l}" + #end if + --galaxy_input_format="${singlePaired.sInput1.ext}" + #else + #if $output_unmapped_reads_l and $output_unmapped_reads_r + --output_unmapped_reads_l="${output_unmapped_reads_l}" + --output_unmapped_reads_r="${output_unmapped_reads_r}" + #end if + #if $output_suppressed_reads_l and $output_suppressed_reads_l + --output_suppressed_reads_l="${output_suppressed_reads_l}" + --output_suppressed_reads_r="${output_suppressed_reads_r}" + #end if + --galaxy_input_format="${singlePaired.pInput1.ext}" + #end if + ## Inputs + --dataType="solexa" ##this indicates that nucleotide base space is used in the wrapper + --suppressHeader="${suppressHeader}" + --genomeSource="${refGenomeSource.genomeSource}" + #if $refGenomeSource.genomeSource == "history": + ##index already exists + #if $refGenomeSource.ownFile.extension.startswith( 'bowtie_' ): + ##user previously built + --ref="${refGenomeSource.ownFile.extra_files_path}/${refGenomeSource.ownFile.metadata.base_name}" + --do_not_build_index + #else: + ##build index on the fly + --ref="${refGenomeSource.ownFile}" + --indexSettings="${refGenomeSource.indexParams.indexSettings}" + #if $refGenomeSource.indexParams.indexSettings == "indexFull": + --iautoB="${refGenomeSource.indexParams.autoBehavior.autoB}" + #if $refGenomeSource.indexParams.autoBehavior.autoB == "set": + --ipacked="${refGenomeSource.indexParams.autoBehavior.packed}" + --ibmax="${refGenomeSource.indexParams.autoBehavior.bmax}" + --ibmaxdivn="${refGenomeSource.indexParams.autoBehavior.bmaxdivn}" + --idcv="${refGenomeSource.indexParams.autoBehavior.dcv}" + #end if + --inodc="${refGenomeSource.indexParams.nodc}" + --inoref="${refGenomeSource.indexParams.noref}" + --ioffrate="${refGenomeSource.indexParams.offrate}" + --iftab="${refGenomeSource.indexParams.ftab}" + --intoa="${refGenomeSource.indexParams.ntoa}" + --iendian="${refGenomeSource.indexParams.endian}" + --iseed="${refGenomeSource.indexParams.seed}" + #end if + #end if + #else + ##use pre-built index + --ref="${refGenomeSource.index.fields.path}" + #end if + --paired="${singlePaired.sPaired}" + #if $singlePaired.sPaired == "single": + + + + + #if $singlePaired.sParams.sSettingsType == "full": + --filetype="${singlePaired.sParams.filetype}" + #else + --filetype="q" + #end if + + #if $singlePaired.sParams.sSettingsType == "full": + --outtype="${singlePaired.sParams.outtype}" + #else + --outtype="S" + #end if + + + + + + --input1="${singlePaired.sInput1}" + --params="${singlePaired.sParams.sSettingsType}" + #if $singlePaired.sParams.sSettingsType == "full": + --skip="${singlePaired.sParams.sSkip}" + --alignLimit="${singlePaired.sParams.sAlignLimit}" + --trimH="${singlePaired.sParams.sTrimH}" + --trimL="${singlePaired.sParams.sTrimL}" + #if $singlePaired.sParams.alignModeOption.alignMode == 'nMode' + --mismatchSeed="${singlePaired.sParams.alignModeOption.sMismatchSeed}" + --mismatchQual="${singlePaired.sParams.alignModeOption.sMismatchQual}" + --seedLen="${singlePaired.sParams.alignModeOption.sSeedLen}" + --rounding="${singlePaired.sParams.alignModeOption.sRounding}" + #else + --maxMismatches="${singlePaired.sParams.alignModeOption.maxMismatches}" + #end if + --forwardAlign="${singlePaired.sParams.sForwardAlign}" + --reverseAlign="${singlePaired.sParams.sReverseAlign}" + --tryHard="${singlePaired.sParams.sBestOption.sTryHardOption.sTryHard}" + --allValAligns="${singlePaired.sParams.sAllValAlignsOption.sAllValAligns}" + #if $singlePaired.sParams.sAllValAlignsOption.sAllValAligns == "noAllValAligns" + --valAlign="${singlePaired.sParams.sAllValAlignsOption.sValAlign}" + #end if + --suppressAlign="${singlePaired.sParams.sSuppressAlign}" + --best="${singlePaired.sParams.sBestOption.sBest}" + #if $singlePaired.sParams.sBestOption.sBest == "doBest": + --strata="${singlePaired.sParams.sBestOption.sdStrata}" + #if $singlePaired.sParams.sBestOption.sTryHardOption.sTryHard == "noTryHard" + --maxBacktracks="${singlePaired.sParams.sBestOption.sTryHardOption.sdMaxBacktracks}" + #end if + #else: + #if $singlePaired.sParams.sBestOption.sTryHardOption.sTryHard == "noTryHard" + --maxBacktracks="${singlePaired.sParams.sBestOption.sTryHardOption.snMaxBacktracks}" + #end if + #end if + --offrate="${singlePaired.sParams.sOffrate}" + --seed="${singlePaired.sParams.sSeed}" + #end if + #else: + --input1="${singlePaired.pInput1}" + --input2="${singlePaired.pInput2}" + --maxInsert="${singlePaired.pMaxInsert}" + --mateOrient="${singlePaired.pMateOrient}" + --params="${singlePaired.pParams.pSettingsType}" + #if $singlePaired.pParams.pSettingsType == "full": + --skip="${singlePaired.pParams.pSkip}" + --alignLimit="${singlePaired.pParams.pAlignLimit}" + --trimH="${singlePaired.pParams.pTrimH}" + --trimL="${singlePaired.pParams.pTrimL}" + #if $singlePaired.pParams.alignModeOption.alignMode == 'nMode' + --mismatchSeed="${singlePaired.pParams.alignModeOption.pMismatchSeed}" + --mismatchQual="${singlePaired.pParams.alignModeOption.pMismatchQual}" + --seedLen="${singlePaired.pParams.alignModeOption.pSeedLen}" + --rounding="${singlePaired.pParams.alignModeOption.pRounding}" + #else + --maxMismatches="${singlePaired.pParams.alignModeOption.maxMismatches}" + #end if + --minInsert="${singlePaired.pParams.pMinInsert}" + --forwardAlign="${singlePaired.pParams.pForwardAlign}" + --reverseAlign="${singlePaired.pParams.pReverseAlign}" + --tryHard="${singlePaired.pParams.pBestOption.pTryHardOption.pTryHard}" + --allValAligns="${singlePaired.pParams.pAllValAlignsOption.pAllValAligns}" + #if $singlePaired.pParams.pAllValAlignsOption.pAllValAligns == "noAllValAligns" + --valAlign="${singlePaired.pParams.pAllValAlignsOption.pValAlign}" + #end if + --suppressAlign="${singlePaired.pParams.pSuppressAlign}" + --best="${singlePaired.pParams.pBestOption.pBest}" + #if $singlePaired.pParams.pBestOption.pBest == "doBest": + --strata="${singlePaired.pParams.pBestOption.pdStrata}" + #if $singlePaired.pParams.pBestOption.pTryHardOption.pTryHard == "noTryHard" + --maxAlignAttempt="${singlePaired.pParams.pBestOption.pTryHardOption.pMaxAlignAttempt}" + --maxBacktracks="${singlePaired.pParams.pBestOption.pTryHardOption.pdMaxBacktracks}" + #end if + #else: + #if $singlePaired.pParams.pBestOption.pTryHardOption.pTryHard == "noTryHard" + --maxAlignAttempt="${singlePaired.pParams.pBestOption.pTryHardOption.pMaxAlignAttempt}" + --maxBacktracks="${singlePaired.pParams.pBestOption.pTryHardOption.pnMaxBacktracks}" + #end if + #end if + --offrate="${singlePaired.pParams.pOffrate}" + --seed="${singlePaired.pParams.pSeed}" + #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + > + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (( + singlePaired['sPaired'] == "single" and + singlePaired['sParams']['sSettingsType'] == "full" and + singlePaired['sParams']['sMaxFile'] is True + ) or ( + singlePaired['sPaired'] == "paired" and + singlePaired['pParams']['pSettingsType'] == "full" and + singlePaired['pParams']['pMaxFile'] is True + )) + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + singlePaired['pParams']['pSettingsType'] == "full" + singlePaired['pParams']['pMaxFile'] is True + + + + + + + + + + + + + + + + (( + singlePaired['sPaired'] == "single" and + singlePaired['sParams']['sSettingsType'] == "full" and + singlePaired['sParams']['sUnmappedFile'] is True + ) or ( + singlePaired['sPaired'] == "paired" and + singlePaired['pParams']['pSettingsType'] == "full" and + singlePaired['pParams']['pUnmappedFile'] is True + )) + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + singlePaired['pParams']['pSettingsType'] == "full" + singlePaired['pParams']['pUnmappedFile'] is True + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient. It is developed by Ben Langmead and Cole Trapnell. Please cite: Langmead B, Trapnell C, Pop M, Salzberg SL. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biology 10:R25. + +.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + + .. __: http://bowtie-bio.sourceforge.net/index.shtml + +------ + +**Input formats** + +Bowtie accepts files in Sanger FASTQ or FASTA format. + +------ + +**A Note on Built-in Reference Genomes** + +The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY. + +------ + +**Outputs** + +If the output is in SAM format, it has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 QNAME Query (pair) NAME + 2 FLAG bitwise FLAG + 3 RNAME Reference sequence NAME + 4 POS 1-based leftmost POSition/coordinate of clipped sequence + 5 MAPQ MAPping Quality (Phred-scaled) + 6 CIGAR extended CIGAR string + 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) + 8 MPOS 1-based Mate POSition + 9 ISIZE Inferred insert SIZE + 10 SEQ query SEQuence on the same strand as the reference + 11 QUAL query QUALity (ASCII-33 gives the Phred base quality) + 12 OPT variable OPTional fields in the format TAG:VTYPE:VALUE + +Otherwise it's in the default MAP format. + +The flags are as follows:: + + Flag Description + ------ ------------------------------------- + 0x0001 the read is paired in sequencing + 0x0002 the read is mapped in a proper pair + 0x0004 the query sequence itself is unmapped + 0x0008 the mate is unmapped + 0x0010 strand of the query (1 for reverse) + 0x0020 strand of the mate + 0x0040 the read is the first read in a pair + 0x0080 the read is the second read in a pair + 0x0100 the alignment is not primary + +It looks like this (scroll sideways to see the entire example):: + + QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT + HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh + HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh + +------- + +**Bowtie settings** + +All of the options have a default value. You can change any of them. Most of the options in Bowtie have been implemented here. + +------ + +**Bowtie parameter list** + +This is an exhaustive list of Bowtie options: + +For indexing (bowtie-build):: + + -a No auto behavior. Disable the default behavior where bowtie automatically + selects values for --bmax/--bmaxdivn/--dcv/--packed parameters according + to the memory available. [off] + --packed Packing. Use a packed representation for DNA strings. [auto] + --bmax INT Suffix maximum. The maximum number of suffixes allowed in a block. [auto] + --bmaxdivn INT Suffix maximum fraction. The maximum number of suffixes allowed in a block + expressed as a fraction of the length of the reference. [4] + --dcv INT Difference-cover sample. Use INT as the period for the difference-cover + sample. [1024] + --nodc INT No difference-cover sample. Disable the difference-cover sample. [off] + -r No reference indexes. Do not build the NAME.3.ebwt and NAME.4.ebwt portions + of the index. Used only for paired-end alignment. [off] + -o Offrate. How many Burrows-Wheeler rows get marked by the indexer. The + indexer will mark every 2^INT rows. The marked rows correspond to rows on + the genome. [5] + -t INT The ftab lookup table used to calculate an initial Burrows-Wheeler range + with respect to the first INT characters of the query. Ftab size is 4^(INT+1) + bytes. [10] + --ntoa N conversion. Convert Ns to As before building the index. Otherwise, Ns are + simply excluded from the index and Bowtie will not find alignments that + overlap them. [off] + --big Endianness. Endianness to use when serializing integers to the index file. [off] + --little Endianness. [--little] + --seed INT Random seed. Use INT as the seed for the pseudo-random number generator. [off] + +For aligning (bowtie):: + + -s INT Skip. Do not align the first INT reads or pairs in the input. [off] + -u INT Align limit. Only align the first INT reads/pairs from the input. [no limit] + -5 INT High-quality trim. Trim INT bases from the high-quality (left) end of each + read before alignment. [0] + -3 INT Low-quality trim. Trim INT bases from the low-quality (right) end of each + read before alignment. [0] + -n INT Mismatch seed. Maximum number of mismatches permitted in the seed (defined + with seed length option). Can be 0, 1, 2, or 3. [2] + -e INT Mismatch quality. Maximum permitted total of quality values at mismatched + read positions. Bowtie rounds quality values to the nearest 10 and saturates + at 30. [70] + -l INT Seed length. The number of bases on the high-quality end of the read to + which the -n ceiling applies. Must be at least 5. [28] + --nomaqround Suppress Maq rounding. Values are internally rounded to the nearest 10 and + saturate at 30. This options turns off that rounding. [off] + -v INT Maq- or SOAP-like alignment policy. This option turns off the default + Maq-like alignment policy in favor of a SOAP-like one. End-to-end alignments + with at most INT mismatches. [off] + -I INT Minimum insert. The minimum insert size for valid paired-end alignments. + Does checking on untrimmed reads if -5 or -3 is used. [0] + -X INT Maximum insert. The maximum insert size for valid paired-end alignments. + Does checking on untrimmed reads if -5 or -3 is used. [250] + --fr Mate orientation. The upstream/downstream mate orientations for a valid + paired-end alignment against the forward reference strand. [--fr] + --rf Mate orientation. [off] + --ff Mate orientation. [off] + --pairtries INT Maximum alignment attempts for paired-end data. [100] + --nofw No forward aligning. Choosing this option means that Bowtie will not attempt + to align against the forward reference strand. [off] + --norc No reverse-complement aligning. Setting this will mean that Bowtie will not + attempt to align against the reverse-complement reference strand. [off] + --un FILENAME Write all reads that could not be aligned to file [off] + --max FILENAME Write all reads with a number of valid alignments exceeding the limit + set with the -m option to file [off] + --maxbts INT Maximum backtracks. The maximum number of backtracks permitted when aligning + a read in -n 2 or -n 3 mode. [125 without --best] [800 with --best] + -y Try hard. Try as hard as possible to find valid alignments when they exist, + including paired-end alignments. [off] + --chunkmbs INT Thread memory. The number of megabytes of memory a given thread is given to + store path descriptors in --best mode. [32] + -k INT Valid alignments. The number of valid alignments per read or pair. [off] + -a All valid alignments. Choosing this means that all valid alignments per read + or pair will be reported. [off] + -m INT Suppress alignments. Suppress all alignments for a particular read or pair + if more than INT reportable alignments exist for it. [no limit] + --best Best mode. Make Bowtie guarantee that reported singleton alignments are + "best" in terms of stratum (the number of mismatches) and quality values at + mismatched position. [off] + --strata Best strata. When running in best mode, report alignments that fall into the + best stratum if there are ones falling into more than one. [off] + -o INT Offrate override. Override the offrate of the index with INT. Some row + markings are discarded when index read into memory. INT must be greater than + the value used to build the index (default: 5). [off] + --seed INT Random seed. Use INT as the seed for the pseudo-random number generator. [off] + --snpphred INT Use INT as the SNP penalty for decoding colorspace alignments. True ratio of + SNPs per base in the subject genome. [see --snpfrac] + --snpfrac DEC Use DEC as the estimated ratio of SNPs per base when decoding colorspace + alignments. [0.001] + --col-keepends Keep the extreme-end nucleotides and qualities when decoding colorspace + alignments. [off] + + +