Mercurial > repos > devteam > bowtie_color_wrappers
changeset 0:fd0914e451c5 draft
Uploaded tarball
author | devteam |
---|---|
date | Mon, 26 Nov 2012 09:47:13 -0500 |
parents | |
children | 2506bd84cc54 |
files | bowtie_color_wrapper.xml bowtie_wrapper.py test-data/bowtie_in1.fastqcssanger test-data/bowtie_in3.fastqcssanger test-data/bowtie_in4.fastqcssanger test-data/bowtie_out1.sam test-data/bowtie_out2.sam test-data/bowtie_out3_1.fastq test-data/bowtie_out3_2.fastq test-data/bowtie_out4.sam test-data/bowtie_out5.sam test-data/chr_m.fasta tool-data/bowtie_indices_color.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 15 files changed, 1665 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bowtie_color_wrapper.xml Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,716 @@ +<tool id="bowtie_color_wrapper" name="Map with Bowtie for SOLiD" version="1.1.2"> + <requirements> + <requirement type='package' version="0.12.7">bowtie</requirement> + </requirements> + <description></description> + <command interpreter="python"> + bowtie_wrapper.py + ## Hackish setting of number of threads + --threads="4" + ## Outputs + --output=$output + #if str( $singlePaired.sPaired ) == "single" + #if $output_unmapped_reads_l + --output_unmapped_reads=$output_unmapped_reads_l + #end if + #if $output_suppressed_reads_l + --output_suppressed_reads=$output_suppressed_reads_l + #end if + #else + #if $output_unmapped_reads_l and $output_unmapped_reads_r + --output_unmapped_reads_l=$output_unmapped_reads_l + --output_unmapped_reads_r=$output_unmapped_reads_r + #end if + #if $output_suppressed_reads_l and $output_suppressed_reads_l + --output_suppressed_reads_l=$output_suppressed_reads_l + --output_suppressed_reads_r=$output_suppressed_reads_r + #end if + #end if + ## Inputs + --dataType="solid" + --suppressHeader=$suppressHeader + --genomeSource=$refGenomeSource.genomeSource + #if $refGenomeSource.genomeSource == "history": + ##index already exists + #if $refGenomeSource.ownFile.extension.startswith( 'bowtie_' ): + ##user previously built + --ref="${refGenomeSource.ownFile.extra_files_path}/${refGenomeSource.ownFile.metadata.base_name}" + --do_not_build_index + #else: + ##build index on the fly + --ref=$refGenomeSource.ownFile + --indexSettings=$refGenomeSource.indexParams.indexSettings + #if $refGenomeSource.indexParams.indexSettings == "indexFull": + --iautoB=$refGenomeSource.indexParams.autoBehavior.autoB + #if $refGenomeSource.indexParams.autoBehavior.autoB == "set": + --ipacked=$refGenomeSource.indexParams.autoBehavior.packed + --ibmax=$refGenomeSource.indexParams.autoBehavior.bmax + --ibmaxdivn=$refGenomeSource.indexParams.autoBehavior.bmaxdivn + --idcv=$refGenomeSource.indexParams.autoBehavior.dcv + #end if + --inodc=$refGenomeSource.indexParams.nodc + --inoref=$refGenomeSource.indexParams.noref + --ioffrate=$refGenomeSource.indexParams.offrate + --iftab=$refGenomeSource.indexParams.ftab + --intoa=$refGenomeSource.indexParams.ntoa + --iendian=$refGenomeSource.indexParams.endian + --iseed=$refGenomeSource.indexParams.seed + --icutoff=$refGenomeSource.indexParams.cutoff + #end if + #end if + #else + ##use pre-built index + --ref="${refGenomeSource.index.fields.path}" + #end if + --paired=$singlePaired.sPaired + #if $singlePaired.sPaired == "single": + --input1=$singlePaired.sInput1 + --params=$singlePaired.sParams.sSettingsType + #if $singlePaired.sParams.sSettingsType == "full": + --skip=$singlePaired.sParams.sSkip + --alignLimit=$singlePaired.sParams.sAlignLimit + --trimH=$singlePaired.sParams.sTrimH + --trimL=$singlePaired.sParams.sTrimL + --mismatchSeed=$singlePaired.sParams.sMismatchSeed + --mismatchQual=$singlePaired.sParams.sMismatchQual + --seedLen=$singlePaired.sParams.sSeedLen + --rounding=$singlePaired.sParams.sRounding + --maqSoapAlign=$singlePaired.sParams.sMaqSoapAlign + --tryHard=$singlePaired.sParams.sTryHard + --valAlign=$singlePaired.sParams.sValAlign + --allValAligns=$singlePaired.sParams.sAllValAligns + --suppressAlign=$singlePaired.sParams.sSuppressAlign + --best=$singlePaired.sParams.sBestOption.sBest + #if $singlePaired.sParams.sBestOption.sBest == "doBest": + --maxBacktracks=$singlePaired.sParams.sBestOption.sdMaxBacktracks + --strata=$singlePaired.sParams.sBestOption.sdStrata + #else: + --maxBacktracks=$singlePaired.sParams.sBestOption.snMaxBacktracks + #end if + --offrate=$singlePaired.sParams.sOffrate + --seed=$singlePaired.sParams.sSeed + --snpphred=$singlePaired.sParams.sSnpphred + --snpfrac=$singlePaired.sParams.sSnpfrac + --keepends=$singlePaired.sParams.sKeepends + #end if + #else: + --input1=$singlePaired.pInput1 + --input2=$singlePaired.pInput2 + --maxInsert=$singlePaired.pMaxInsert + --mateOrient=$singlePaired.pMateOrient + --params=$singlePaired.pParams.pSettingsType + #if $singlePaired.pParams.pSettingsType == "full": + --skip=$singlePaired.pParams.pSkip + --alignLimit=$singlePaired.pParams.pAlignLimit + --trimH=$singlePaired.pParams.pTrimH + --trimL=$singlePaired.pParams.pTrimL + --mismatchSeed=$singlePaired.pParams.pMismatchSeed + --mismatchQual=$singlePaired.pParams.pMismatchQual + --seedLen=$singlePaired.pParams.pSeedLen + --rounding=$singlePaired.pParams.pRounding + --maqSoapAlign=$singlePaired.pParams.pMaqSoapAlign + --minInsert=$singlePaired.pParams.pMinInsert + --maxAlignAttempt=$singlePaired.pParams.pMaxAlignAttempt + --forwardAlign=$singlePaired.pParams.pForwardAlign + --reverseAlign=$singlePaired.pParams.pReverseAlign + --tryHard=$singlePaired.pParams.pTryHard + --valAlign=$singlePaired.pParams.pValAlign + --allValAligns=$singlePaired.pParams.pAllValAligns + --suppressAlign=$singlePaired.pParams.pSuppressAlign + --best=$singlePaired.pParams.pBestOption.pBest + #if $singlePaired.pParams.pBestOption.pBest == "doBest": + --maxBacktracks=$singlePaired.pParams.pBestOption.pdMaxBacktracks + --strata=$singlePaired.pParams.pBestOption.pdStrata + #else: + --maxBacktracks=$singlePaired.pParams.pBestOption.pnMaxBacktracks + #end if + --offrate=$singlePaired.pParams.pOffrate + --seed=$singlePaired.pParams.pSeed + --snpphred=$singlePaired.pParams.pSnpphred + --snpfrac=$singlePaired.pParams.pSnpfrac + --keepends=$singlePaired.pParams.pKeepends + #end if + #end if + </command> + <inputs> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select the reference genome" help="if your genome of interest is not listed - contact Galaxy team"> + <options from_data_table="bowtie_indexes_color"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="bowtie_color_index,fasta" metadata_name="dbkey" label="Select the reference genome" /> + <conditional name="indexParams"> + <param name="indexSettings" type="select" label="Choose whether to use Default options for building indices or to Set your own" help="These settings are ignored when using a prebuilt index"> + <option value="indexPreSet">Default</option> + <option value="indexFull">Set your own</option> + </param> + <when value="indexPreSet" /> + <when value="indexFull"> + <conditional name="autoBehavior"> + <param name="autoB" type="select" label="Choose to use automatic or specified behavior for some parameters (-a)" help="Allows you to set --packed, --bmax, --bmaxdivn, and --dcv"> + <option value="auto">Automatic behavior</option> + <option value="set">Set values (sets --noauto and allows others to be set)</option> + </param> + <when value="auto" /> + <when value="set"> + <param name="packed" type="select" label="Whether or not to use a packed representation for DNA strings (--packed)"> + <option value="unpacked">Use regular representation</option> + <option value="packed">Use packed representation</option> + </param> + <param name="bmax" type="integer" value="-1" label="Maximum number of suffixes allowed in a block (--bmax)" help="-1 for not specified. Must be at least 1" /> + <param name="bmaxdivn" type="integer" value="4" label="Maximum number of suffixes allowed in a block as a fraction of the length of the reference (--bmaxdivn)" /> + <param name="dcv" type="integer" value="1024" label="The period for the difference-cover sample (--dcv)" /> + </when> + </conditional> + <param name="nodc" type="select" label="Whether or not to disable the use of the difference-cover sample (--nodc)" help="Suffix sorting becomes quadratic-time in the worst case (with a very repetitive reference)"> + <option value="dc">Use difference-cover sample</option> + <option value="nodc">Disable difference-cover sample</option> + </param> + <param name="noref" type="select" label="Whether or not to build the part of the reference index used only in paired-end alignment (-r)"> + <option value="ref">Build all index files</option> + <option value="noref">Do not build paired-end alignment index files</option> + </param> + <param name="offrate" type="integer" value="5" label="How many rows get marked during annotation of some or all of the Burrows-Wheeler rows (-o)" /> + <param name="ftab" type="integer" value="10" label="The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query (-t)" help="ftab is 4^(n+1) bytes" /> + <param name="ntoa" type="select" label="Whether or not to convert Ns in the reference sequence to As (--ntoa)"> + <option value="no">Do not convert Ns</option> + <option value="yes">Convert Ns to As</option> + </param> + <param name="endian" type="select" label="Endianness to use when serializing integers to the index file (--big/--little)" help="Little is most appropriate for Intel- and AMD-based architecture"> + <option value="little">Little</option> + <option value="big">Big</option> + </param> + <param name="seed" type="integer" value="-1" label="Seed for the pseudorandom number generator (--seed)" help="Use -1 to use default" /> + <param name="cutoff" type="integer" value="-1" label="Number of first bases of the reference sequence to index (--cutoff)" help="Use -1 to use default" /> + </when> <!-- indexFull --> + </conditional> <!-- indexParams --> + </when> <!-- history --> + </conditional> <!-- refGenomeSource --> + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="sInput1" type="data" format="fastqcssanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/> + <conditional name="sParams"> + <param name="sSettingsType" type="select" label="Bowtie settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list"> + <option value="preSet">Commonly used</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> + <when value="full"> + <param name="sSkip" type="integer" value="0" label="Skip the first n reads (-s)" /> + <param name="sAlignLimit" type="integer" value="-1" label="Only align the first n reads (-u)" help="-1 for off" /> + <param name="sTrimH" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)" /> + <param name="sTrimL" type="integer" value="0" label="Trim n bases from low-quality (right) end of each read before alignment (-3)" /> + <param name="sMismatchSeed" type="integer" value="2" label="Maximum number of mismatches permitted in the seed (-n)" help="May be 0, 1, 2, or 3" /> + <param name="sMismatchQual" type="integer" value="70" label="Maximum permitted total of quality values at mismatched read positions (-e)" /> + <param name="sSeedLen" type="integer" value="28" label="Seed length (-l)" help="Minimum value is 5" /> + <param name="sRounding" type="select" label="Whether or not to round to the nearest 10 and saturating at 30 (--nomaqround)"> + <option value="round">Round to nearest 10</option> + <option value="noRound">Do not round to nearest 10</option> + </param> + <param name="sMaqSoapAlign" type="integer" value="-1" label="Number of mismatches for SOAP-like alignment policy (-v)" help="-1 for default MAQ-like alignment policy" /> + <param name="sTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode"> + <option value="noTryHard">Do not try hard</option> + <option value="doTryHard">Try hard</option> + </param> + <param name="sValAlign" type="integer" value="1" label="Report up to n valid alignments per read (-k)" /> + <param name="sAllValAligns" type="select" label="Whether or not to report all valid alignments per read (-a)"> + <option value="noAllValAligns">Do not report all valid alignments</option> + <option value="doAllValAligns">Report all valid alignments</option> + </param> + <param name="sSuppressAlign" type="integer" value="-1" label="Suppress all alignments for a read if more than n reportable alignments exist (-m)" help="-1 for no limit" /> + <param name="sMaxFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads with a number of valid alignments exceeding the limit set with the -m option to a file (--max)" /> + <param name="sUnmappedFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file (--un)" /> + <conditional name="sBestOption"> + <param name="sBest" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option"> + <option value="noBest">Do not use best</option> + <option value="doBest">Use best</option> + </param> + <when value="noBest"> + <param name="snMaxBacktracks" type="integer" value="125" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> + </when> + <when value="doBest"> + <param name="sdMaxBacktracks" type="integer" value="800" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> + <param name="sdStrata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)"> + <option value="noStrata">Do not use strata option</option> + <option value="doStrata">Use strata option</option> + </param> + </when> + </conditional> <!-- sBestOption --> + <param name="sOffrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" /> + <param name="sSeed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" /> + <param name="sSnpphred" type="integer" value="-1" label="SNP penalty (ratio of SNPs per base in the subject genome) (--snpphred)" help="Enter this OR Ratio of SNPs per base" /> + <param name="sSnpfrac" type="float" value="0.001" label="Ratio of SNPs per base (estimated ratio for colorspace alignments) (--snpfrac)" help="Enter this OR SNP penalty" /> + <param name="sKeepends" type="select" label="Keep the extreme-ends nucleotides and qualities rather than trimming them (--col-keepends)"> + <option value="doKeepends">Keep ends</option> + <option value="noKeepends">Trim ends</option> + </param> + </when> <!-- full --> + </conditional> <!-- sParams --> + </when> <!-- single --> + <when value="paired"> + <param name="pInput1" type="data" format="fastqcssanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/> + <param name="pInput2" type="data" format="fastqcssanger" label="Reverse FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/> + <param name="pMaxInsert" type="integer" value="1000" label="Maximum insert size for valid paired-end alignments (-X)" /> + <param name="pMateOrient" type="select" label="The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand (--fr/--rf/--ff)"> + <option value="ff">FF (for SOLiD)</option> + <option value="fr">FR (for Illumina)</option> + <option value="rf">RF</option> + </param> + <conditional name="pParams"> + <param name="pSettingsType" type="select" label="Bowtie settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list"> + <option value="preSet">Commonly used</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> + <when value="full"> + <param name="pSkip" type="integer" value="0" label="Skip the first n pairs (-s)" /> + <param name="pAlignLimit" type="integer" value="-1" label="Only align the first n pairs (-u)" help="-1 for off" /> + <param name="pTrimH" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)" /> + <param name="pTrimL" type="integer" value="0" label="Trim n bases from low-quality (right) end of each read before alignment (-3)" /> + <param name="pMismatchSeed" type="integer" value="2" label="Maximum number of mismatches permitted in the seed (-n)" help="May be 0, 1, 2, or 3" /> + <param name="pMismatchQual" type="integer" value="70" label="Maximum permitted total of quality values at mismatched read positions (-e)" /> + <param name="pSeedLen" type="integer" value="28" label="Seed length (-l)" help="Minimum value is 5" /> + <param name="pRounding" type="select" label="Whether or not to round to the nearest 10 and saturating at 30 (--nomaqround)"> + <option value="round">Round to nearest 10</option> + <option value="noRound">Do not round to nearest 10</option> + </param> + <param name="pMaqSoapAlign" type="integer" value="-1" label="Number of mismatches for SOAP-like alignment policy (-v)" help="-1 for default MAQ-like alignment policy" /> + <param name="pMinInsert" type="integer" value="0" label="Minimum insert size for valid paired-end alignments (-I)" /> + <param name="pMaxAlignAttempt" type="integer" value="100" label="Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate (--pairtries)" /> + <param name="pForwardAlign" type="select" label="Choose whether or not to attempt to align the forward reference strand (--nofw)"> + <option value="forward">Align against the forward reference strand</option> + <option value="noForward">Do not align against the forward reference strand</option> + </param> + <param name="pReverseAlign" type="select" label="Choose whether or not to align against the reverse-complement reference strand (--norc)"> + <option value="reverse">Align against the reverse-complement reference strand</option> + <option value="noReverse">Do not align against the reverse-complement reference strand</option> + </param> + <param name="pTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode"> + <option value="noTryHard">Do not try hard</option> + <option value="doTryHard">Try hard</option> + </param> + <param name="pValAlign" type="integer" value="1" label="Report up to n valid arguments per pair (-k)" /> + <param name="pAllValAligns" type="select" label="Whether or not to report all valid alignments per pair (-a)"> + <option value="noAllValAligns">Do not report all valid alignments</option> + <option value="doAllValAligns">Report all valid alignments</option> + </param> + <param name="pSuppressAlign" type="integer" value="-1" label="Suppress all alignments for a pair if more than n reportable alignments exist (-m)" help="-1 for no limit" /> + <param name="pMaxFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads with a number of valid alignments exceeding the limit set with the -m option to a file (--max)" /> + <param name="pUnmappedFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file (--un)" /> + <conditional name="pBestOption"> + <param name="pBest" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option"> + <option value="noBest">Do not use best</option> + <option value="doBest">Use best</option> + </param> + <when value="noBest"> + <param name="pnMaxBacktracks" type="integer" value="125" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> + </when> + <when value="doBest"> + <param name="pdMaxBacktracks" type="integer" value="800" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> + <param name="pdStrata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)"> + <option value="noStrata">Do not use strata option</option> + <option value="doStrata">Use strata option</option> + </param> + </when> + </conditional> <!-- pBestOption --> + <param name="pOffrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" /> + <param name="pSeed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" /> + <param name="pSnpphred" type="integer" value="-1" label="SNP penalty (ratio of SNPs per base in the subject genome) (--snpphred)" help="Enter this OR Ratio of SNPs per base" /> + <param name="pSnpfrac" type="float" value="0.001" label="Ratio of SNPs per base (estimated ratio for colorspace alignments) (--snpfrac)" help="Enter this OR SNP penalty" /> + <param name="pKeepends" type="select" label="Keep the extreme-ends nucleotides and qualities rather than trimming them (--col-keepends)"> + <option value="doKeepends">Keep ends</option> + <option value="noKeepends">Trim ends</option> + </param> + </when> <!-- full --> + </conditional> <!-- pParams --> + </when> <!-- paired --> + </conditional> <!-- singlePaired --> + <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="Bowtie produces SAM with several lines of header information by default" /> + </inputs> + <outputs> + <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads"> + <actions> + <conditional name="refGenomeSource.genomeSource"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="bowtie_indexes_color" column="1" offset="0"> + <filter type="param_value" column="0" value="#" filter_by="startswith" keep="False"/> + <filter type="param_value" ref="refGenomeSource.index" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </data> + <data format="fastqcssanger" name="output_suppressed_reads_l" label="${tool.name} on ${on_string}: suppressed reads (L)"> + <filter>(( + singlePaired['sPaired'] == "single" and + singlePaired['sParams']['sSettingsType'] == "full" and + singlePaired['sParams']['sMaxFile'] is True + ) or ( + singlePaired['sPaired'] == "paired" and + singlePaired['pParams']['pSettingsType'] == "full" and + singlePaired['pParams']['pMaxFile'] is True + )) + </filter> + </data> + <data format="fastqcssanger" name="output_suppressed_reads_r" label="${tool.name} on ${on_string}: suppressed reads (R)"> + <filter>singlePaired['sPaired'] == "paired"</filter> + <filter>singlePaired['pParams']['pSettingsType'] == "full"</filter> + <filter>singlePaired['pParams']['pMaxFile'] is True</filter> + </data> + <data format="fastqcssanger" name="output_unmapped_reads_l" label="${tool.name} on ${on_string}: unmapped reads (L)"> + <filter> + (( + singlePaired['sPaired'] == "single" and + singlePaired['sParams']['sSettingsType'] == "full" and + singlePaired['sParams']['sUnmappedFile'] is True + ) or ( + singlePaired['sPaired'] == "paired" and + singlePaired['pParams']['pSettingsType'] == "full" and + singlePaired['pParams']['pUnmappedFile'] is True + )) + </filter> + </data> + <data format="fastqcssanger" name="output_unmapped_reads_r" label="${tool.name} on ${on_string}: unmapped reads (R)"> + <filter>singlePaired['sPaired'] == "paired"</filter> + <filter>singlePaired['pParams']['pSettingsType'] == "full"</filter> + <filter>singlePaired['pParams']['pUnmappedFile'] is True</filter> + </data> + </outputs> + <tests> + <test> + <!-- + Bowtie command: + bowtie -q -p 4 -S +sam-nohead -C chrM_color test-data/bowtie_in1.fastqcssanger > bowtie_out1_u.sam + sort bowtie_out1_u.sam > bowtie_out1.sam + -p is the number of threads, which is hardcoded above. You need to replace the + with 2 dashes. + chrM_color needs to be the base location/name of the index files. + --> + <param name="genomeSource" value="indexed" /> + <param name="index" value="equCab2chrM" /> + <param name="sPaired" value="single" /> + <param name="sInput1" ftype="fastqcssanger" value="bowtie_in1.fastqcssanger" /> + <param name="sSettingsType" value="preSet" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out1.sam" sort="True" /> + </test> + <test> + <!-- + Bowtie command: + bowtie-build -C -f test-data/chr_m.fasta chrM_color + bowtie -q -X 1000 +ff -p 4 -S +sam-nohead -C -n 2 -e 70 -l 28 -X 250 +pairtries 100 +maxbts 125 -k 1 +snpfrac 0.001 +col-keepends +un bowtie_out3_u.fastq chrM_color -1 test-data/bowtie_in3.fastqcssanger -2 test-data/bowtie_in4.fastqcssanger > bowtie_out2_u.sam + sort bowtie_out2_u.sam > bowtie_out2.sam + sort bowtie_out3_u_1.sam > bowtie_out3_1.sam + sort bowtie_out3_u_2.sam > bowtie_out3_2.sam + Then also need to modify bowtie_out3_1.sam and bowtie_out3_2.sam so that all @ lines come before sequence lines. + The two unmapped output files will be named bowtie_out4_1.fastq and bowtie_out4_2.fastq + -p is the number of threads, hardcoded above. You need to replace the + with 2 dashes. + chrM_base is the index files' location/base name. + --> + <param name="genomeSource" value="history" /> + <param name="ownFile" value="chr_m.fasta" /> + <param name="indexSettings" value="indexPreSet" /> + <param name="sPaired" value="paired" /> + <param name="pInput1" ftype="fastqcssanger" value="bowtie_in3.fastqcssanger" /> + <param name="pInput2" ftype="fastqcssanger" value="bowtie_in4.fastqcssanger" /> + <param name="pMaxInsert" value="1000" /> + <param name="pMateOrient" value="ff" /> + <param name="pSettingsType" value="full" /> + <param name="pSkip" value="0" /> + <param name="pAlignLimit" value="-1" /> + <param name="pTrimH" value="0" /> + <param name="pTrimL" value="0" /> + <param name="pMismatchSeed" value="2" /> + <param name="pMismatchQual" value="70" /> + <param name="pSeedLen" value="28" /> + <param name="pRounding" value="round" /> + <param name="pMaqSoapAlign" value="-1" /> + <param name="pMinInsert" value="0" /> + <param name="pMaxAlignAttempt" value="100" /> + <param name="pForwardAlign" value="forward" /> + <param name="pReverseAlign" value="reverse" /> + <param name="pTryHard" value="noTryHard" /> + <param name="pValAlign" value="1" /> + <param name="pAllValAligns" value="noAllValAligns" /> + <param name="pSuppressAlign" value="-1" /> + <param name="pUnmappedFile" value="true" /> + <param name="pMaxFile" value="false" /> + <param name="pBest" value="noBest" /> + <param name="pnMaxBacktracks" value="125" /> + <param name="pOffrate" value="-1" /> + <param name="pSeed" value="-1" /> + <param name="pSnpphred" value="-1" /> + <param name="pSnpfrac" value="0.001" /> + <param name="pKeepends" value="doKeepends" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out2.sam" sort="True" /> + <output name="output_unmapped_reads_l" ftype="fastqcssanger" file="bowtie_out3_1.fastq" sort="True" /> + <output name="output_unmapped_reads_r" ftype="fastqcssanger" file="bowtie_out3_2.fastq" sort="True" /> + </test> + <test> + <!-- + Bowtie command: + bowtie -q -p 4 -S +sam-nohead -C -n 2 -e 70 -l 28 +maxbts 125 -k 1 +snpfrac 0.001 +col-keepends chrM_color test-data/bowtie_in1.fastqcssanger > bowtie_out4_u.sam + sort bowtie_out4_u.sam > bowtie_out4.sam + -p is the number of threads, hardcoded above. You need to replace the + with 2 dashes. + chrM_base is the index files' location/base name. + --> + <param name="genomeSource" value="indexed" /> + <param name="index" value="equCab2chrM" /> + <param name="sPaired" value="single" /> + <param name="sInput1" ftype="fastqcssanger" value="bowtie_in1.fastqcssanger" /> + <param name="sSettingsType" value="full" /> + <param name="sSkip" value="0" /> + <param name="sAlignLimit" value="-1" /> + <param name="sTrimH" value="0" /> + <param name="sTrimL" value="0" /> + <param name="sMismatchSeed" value="2" /> + <param name="sMismatchQual" value="70" /> + <param name="sSeedLen" value="28" /> + <param name="sRounding" value="round" /> + <param name="sMaqSoapAlign" value="-1" /> + <param name="sTryHard" value="noTryHard" /> + <param name="sValAlign" value="1" /> + <param name="sAllValAligns" value="noAllValAligns" /> + <param name="sSuppressAlign" value="-1" /> + <param name="sUnmappedFile" value="false" /> + <param name="sMaxFile" value="false" /> + <param name="sBest" value="noBest" /> + <param name="snMaxBacktracks" value="125" /> + <param name="sOffrate" value="-1" /> + <param name="sSeed" value="-1" /> + <param name="sSnpphred" value="-1" /> + <param name="sSnpfrac" value="0.001" /> + <param name="sKeepends" value="doKeepends" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out4.sam" sort="True" /> + </test> + <test> + <!-- + Bowtie command: + bowtie-build +noauto +bmaxdivn 4 +dcv 1024 +offrate 5 +ftabchars 10 +little -C -f test-data/chr_m.fasta chrM_color + bowtie -q -X 1000 +ff -p 4 -S +sam-nohead -C chrM_color -1 test-data/bowtie_in3.fastqcssanger -2 test-data/bowtie_in4.fastqcssanger > bowtie_out5_u.sam + sort bowtie_out5_u.sam > bowtie_out5.sam + -p is the number of threads, hardcoded above. You need to replace the + with 2 dashes. + chrM_base is the index files' location/base name. + --> + <param name="genomeSource" value="history" /> + <param name="ownFile" value="chr_m.fasta" /> + <param name="indexSettings" value="indexFull" /> + <param name="autoB" value="set" /> + <param name="packed" value="unpacked" /> + <param name="bmax" value="-1" /> + <param name="bmaxdivn" value="4" /> + <param name="dcv" value="1024" /> + <param name="nodc" value="dc" /> + <param name="noref" value="ref" /> + <param name="offrate" value="5" /> + <param name="ftab" value="10" /> + <param name="ntoa" value="no" /> + <param name="endian" value="little" /> + <param name="seed" value="-1" /> + <param name="cutoff" value="-1" /> + <param name="sPaired" value="paired" /> + <param name="pInput1" ftype="fastqcssanger" value="bowtie_in3.fastqcssanger" /> + <param name="pInput2" ftype="fastqcssanger" value="bowtie_in4.fastqcssanger" /> + <param name="pMaxInsert" value="1000" /> + <param name="pMateOrient" value="ff" /> + <param name="pSettingsType" value="preSet" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out5.sam" sort="True" /> + </test> + </tests> + + <help> + +**What it does** + +Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient. It is developed by Ben Langmead and Cole Trapnell. Please cite: Langmead B, Trapnell C, Pop M, Salzberg SL. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biology 10:R25. + +.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + + .. __: http://bowtie-bio.sourceforge.net/index.shtml + +------ + +**Input formats** + +Bowtie accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files. + +------ + +**A Note on Built-in Reference Genomes** + +The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY. + +------ + +**Outputs** + +The output is in SAM format, and has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 QNAME Query (pair) NAME + 2 FLAG bitwise FLAG + 3 RNAME Reference sequence NAME + 4 POS 1-based leftmost POSition/coordinate of clipped sequence + 5 MAPQ MAPping Quality (Phred-scaled) + 6 CIGAR extended CIGAR string + 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) + 8 MPOS 1-based Mate POSition + 9 ISIZE Inferred insert SIZE + 10 SEQ query SEQuence on the same strand as the reference + 11 QUAL query QUALity (ASCII-33 gives the Phred base quality) + 12 OPT variable OPTional fields in the format TAG:VTYPE:VALUE + +The flags are as follows:: + + Flag Description + ------ ------------------------------------- + 0x0001 the read is paired in sequencing + 0x0002 the read is mapped in a proper pair + 0x0004 the query sequence itself is unmapped + 0x0008 the mate is unmapped + 0x0010 strand of the query (1 for reverse) + 0x0020 strand of the mate + 0x0040 the read is the first read in a pair + 0x0080 the read is the second read in a pair + 0x0100 the alignment is not primary + +It looks like this (scroll sideways to see the entire example):: + + QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT + HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh + HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh + +------- + +**Bowtie settings** + +All of the options have a default value. You can change any of them. Most of the options in Bowtie have been implemented here. + +------ + +**Bowtie parameter list** + +This is an exhaustive list of Bowtie options: + +For indexing (bowtie-build):: + + -a No auto behavior. Disable the default behavior where bowtie automatically + selects values for --bmax/--bmaxdivn/--dcv/--packed parameters according + to the memory available. [off] + --packed Packing. Use a packed representation for DNA strings. [auto] + --bmax INT Suffix maximum. The maximum number of suffixes allowed in a block. [auto] + --bmaxdivn INT Suffix maximum fraction. The maximum number of suffixes allowed in a block + expressed as a fraction of the length of the reference. [4] + --dcv INT Difference-cover sample. Use INT as the period for the difference-cover + sample. [1024] + --nodc INT No difference-cover sample. Disable the difference-cover sample. [off] + -r No reference indexes. Do not build the NAME.3.ebwt and NAME.4.ebwt portions + of the index. Used only for paired-end alignment. [off] + -o Offrate. How many Burrows-Wheeler rows get marked by the indexer. The + indexer will mark every 2^INT rows. The marked rows correspond to rows on + the genome. [5] + -t INT Ftab. The lookup table used to calculate an initial Burrows-Wheeler range + with respect to the first INT characters of the query. Ftab is 4^INT+1 + bytes. [10] + --ntoa N conversion. Convert Ns to As before building the index. Otherwise, Ns are + simply excluded from the index and Bowtie will not find alignments that + overlap them. [off] + --big Endianness. Endianness to use when serializing integers to the index file. [off] + --little Endianness. [--little] + --seed INT Random seed. Use INT as the seed for the pseudo-random number generator. [off] + --cutoff INT Cutoff. Index only the first INT bases of the reference sequences (cumulative + across sequences) and ignore the rest. [off] + +For aligning (bowtie):: + + -s INT Skip. Do not align the first INT reads or pairs in the input. [off] + -u INT Align limit. Only align the first INT reads/pairs from the input. [no limit] + -5 INT High-quality trim. Trim INT bases from the high-quality (left) end of each + read before alignment. [0] + -3 INT Low-quality trim. Trim INT bases from the low-quality (right) end of each + read before alignment. [0] + -n INT Mismatch seed. Maximum number of mismatches permitted in the seed (defined + with seed length option). Can be 0, 1, 2, or 3. [2] + -e INT Mismatch quality. Maximum permitted total of quality values at mismatched + read positions. Bowtie rounds quality values to the nearest 10 and saturates + at 30. [70] + -l INT Seed length. The number of bases on the high-quality end of the read to + which the -n ceiling applies. Must be at least 5. [28] + --nomaqround Suppress MAQ rounding. Values are internally rounded to the nearest 10 and + saturate at 30. This options turns off that rounding. [off] + -v INT MAQ- or SOAP-like alignment policy. This option turns off the default + MAQ-like alignment policy in favor of a SOAP-like one. End-to-end alignments + with at most INT mismatches. [off] + -I INT Minimum insert. The minimum insert size for valid paired-end alignments. + Does checking on untrimmed reads if -5 or -3 is used. [0] + -X INT Maximum insert. The maximum insert size for valid paired-end alignments. + Does checking on untrimmed reads if -5 or -3 is used. [250] + --fr Mate orientation. The upstream/downstream mate orientations for a valid + paired-end alignment against the forward reference strand. [--fr] + --rf Mate orientation. [off] + --ff Mate orientation. [off] + --pairtries INT Maximum alignment attempts for paired-end data. [100] + --nofw No forward aligning. Choosing this option means that Bowtie will not attempt + to align against the forward reference strand. [off] + --norc No reverse-complement aligning. Setting this will mean that Bowtie will not + attempt to align against the reverse-complement reference strand. [off] + --maxbts INT Maximum backtracks. The maximum number of backtracks permitted when aligning + a read in -n 2 or -n 3 mode. [125 without --best] [800 with --best] + -y Try hard. Try as hard as possible to find valid alignments when they exist, + including paired-end alignments. [off] + --chunkmbs INT Thread memory. The number of megabytes of memory a given thread is given to + store path descriptors in --best mode. [32] + -k INT Valid alignments. The number of valid alignments per read or pair. [off] + -a All valid alignments. Choosing this means that all valid alignments per read + or pair will be reported. [off] + -m INT Suppress alignments. Suppress all alignments for a particular read or pair + if more than INT reportable alignments exist for it. [no limit] + --best Best mode. Make Bowtie guarantee that reported singleton alignments are + "best" in terms of stratum (the number of mismatches) and quality values at + mismatched position. [off] + --strata Best strata. When running in best mode, report alignments that fall into the + best stratum if there are ones falling into more than one. [off] + -o INT Offrate override. Override the offrate of the index with INT. Some row + markings are discarded when index read into memory. INT must be greater than + the value used to build the index (default: 5). [off] + --seed INT Random seed. Use INT as the seed for the pseudo-random number generator. [off] + --snpphred INT Use INT as the SNP penalty for decoding colorspace alignments. True ratio of + SNPs per base in the subject genome. [see --snpfrac] + --snpfrac DEC Use DEC as the estimated ratio of SNPs per base when decoding colorspace + alignments. [0.001] + --col-keepends Keep the extreme-end nucleotides and qualities when decoding colorspace + alignments. [off] + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bowtie_wrapper.py Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,469 @@ +#!/usr/bin/env python + +""" +Runs Bowtie on single-end or paired-end data. +For use with Bowtie v. 0.12.7 + +usage: bowtie_wrapper.py [options] + -t, --threads=t: The number of threads to run + -o, --output=o: The output file + --output_unmapped_reads=: File name for unmapped reads (single-end) + --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end) + --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end) + --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end) + --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end) + --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end) + -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format + -I, --input2=I: The reverse reads file in Sanger FASTQ format + -4, --dataType=4: The type of data (SOLiD or Solexa) + -2, --paired=2: Whether the data is single- or paired-end + -g, --genomeSource=g: The type of reference provided + -r, --ref=r: The reference genome to use or index + -s, --skip=s: Skip the first n reads + -a, --alignLimit=a: Only align the first n reads + -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment + -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment + -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed + -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions + -l, --seedLen=l: Seed length + -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30 + -P, --maqSoapAlign=P: Choose MAQ- or SOAP-like alignment policy + -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist + -v, --valAlign=v: Report up to n valid arguments per read + -V, --allValAligns=V: Whether or not to report all valid alignments per read + -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist + -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions + -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read + -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable + -j, --minInsert=j: Minimum insert size for valid paired-end alignments + -J, --maxInsert=J: Maximum insert size for valid paired-end alignments + -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand + -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate + -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand + -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand + -F, --offrate=F: Override the offrate of the index to n + -8, --snpphred=8: SNP penalty on Phred scale + -6, --snpfrac=6: Fraction of sites expected to be SNP sites + -7, --keepends=7: Keep extreme-end nucleotides and qualities + -S, --seed=S: Seed for pseudo-random number generator + -C, --params=C: Whether to use default or specified parameters + -u, --iautoB=u: Automatic or specified behavior + -K, --ipacked=K: Whether or not to use a packed representation for DNA strings + -Q, --ibmax=Q: Maximum number of suffixes allowed in a block + -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference + -D, --idcv=D: The period for the difference-cover sample + -U, --inodc=U: Whether or not to disable the use of the difference-cover sample + -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment + -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows + -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query + -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As + -N, --iendian=N: Endianness to use when serializing integers to the index file + -Z, --iseed=Z: Seed for the pseudorandom number generator + -c, --icutoff=c: Number of first bases of the reference sequence to index + -x, --indexSettings=x: Whether or not indexing options are to be set + -H, --suppressHeader=H: Suppress header + --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' +""" + +import optparse, os, shutil, subprocess, sys, tempfile + +#Allow more than Sanger encoded variants +DEFAULT_ASCII_ENCODING = '--phred33-quals' +GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' } +#FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config + +def stop_err( msg ): + sys.stderr.write( '%s\n' % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' ) + parser.add_option( '-o', '--output', dest='output', help='The output file' ) + parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' ) + parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' ) + parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' ) + parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' ) + parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' ) + parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' ) + parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' ) + parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) + parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) + parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' ) + parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' ) + parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' ) + parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' ) + parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' ) + parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' ) + parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' ) + parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' ) + parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' ) + parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' ) + parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' ) + parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' ) + parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' ) + parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' ) + parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' ) + parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' ) + parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" ) + parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' ) + parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' ) + parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' ) + parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' ) + parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' ) + parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' ) + parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' ) + parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' ) + parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' ) + parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' ) + parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' ) + parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' ) + parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' ) + parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' ) + parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' ) + parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' ) + parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' ) + parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' ) + parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' ) + parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' ) + parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' ) + parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' ) + parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' ) + parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' ) + parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' ) + parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' ) + parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index' ) + parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) + parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' ) + parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' ) + parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) + (options, args) = parser.parse_args() + stdout = '' + + # make temp directory for placement of indices and copy reference file there if necessary + tmp_index_dir = tempfile.mkdtemp() + # get type of data (solid or solexa) + if options.dataType == 'solid': + colorspace = '-C' + else: + colorspace = '' + # index if necessary + if options.genomeSource == 'history' and not options.do_not_build_index: + # set up commands + if options.index_settings =='indexPreSet': + indexing_cmds = '%s' % colorspace + else: + try: + if options.iautoB and options.iautoB == 'set': + iautoB = '--noauto' + else: + iautoB = '' + if options. ipacked and options.ipacked == 'packed': + ipacked = '--packed' + else: + ipacked = '' + if options.ibmax and int( options.ibmax ) >= 1: + ibmax = '--bmax %s' % options.ibmax + else: + ibmax = '' + if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0: + ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn + else: + ibmaxdivn = '' + if options.idcv and int( options.idcv ) > 0: + idcv = '--dcv %s' % options.idcv + else: + idcv = '' + if options.inodc and options.inodc == 'nodc': + inodc = '--nodc' + else: + inodc = '' + if options.inoref and options.inoref == 'noref': + inoref = '--noref' + else: + inoref = '' + if options.iftab and int( options.iftab ) >= 0: + iftab = '--ftabchars %s' % options.iftab + else: + iftab = '' + if options.intoa and options.intoa == 'yes': + intoa = '--ntoa' + else: + intoa = '' + if options.iendian and options.iendian == 'big': + iendian = '--big' + else: + iendian = '--little' + if options.iseed and int( options.iseed ) > 0: + iseed = '--seed %s' % options.iseed + else: + iseed = '' + if options.icutoff and int( options.icutoff ) > 0: + icutoff = '--cutoff %s' % options.icutoff + else: + icutoff = '' + indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \ + ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, + inoref, options.ioffrate, iftab, intoa, iendian, + iseed, icutoff, colorspace ) + except ValueError, e: + # clean up temp dir + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) + ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir ) + ref_file_name = ref_file.name + ref_file.close() + os.symlink( options.ref, ref_file_name ) + cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name ) + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + except Exception, e: + # clean up temp dir + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error indexing reference sequence\n' + str( e ) ) + stdout += 'File indexed. ' + else: + ref_file_name = options.ref + # set up aligning and generate aligning command options + # automatically set threads in both cases + tmp_suppressed_file_name = None + tmp_unmapped_file_name = None + if options.suppressHeader == 'true': + suppressHeader = '--sam-nohead' + else: + suppressHeader = '' + if options.maxInsert and int( options.maxInsert ) > 0: + maxInsert = '-X %s' % options.maxInsert + else: + maxInsert = '' + if options.mateOrient: + mateOrient = '--%s' % options.mateOrient + else: + mateOrient = '' + quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING ) + if options.params == 'preSet': + aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \ + ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding ) + else: + try: + if options.skip and int( options.skip ) > 0: + skip = '-s %s' % options.skip + else: + skip = '' + if options.alignLimit and int( options.alignLimit ) >= 0: + alignLimit = '-u %s' % options.alignLimit + else: + alignLimit = '' + if options.trimH and int( options.trimH ) > 0: + trimH = '-5 %s' % options.trimH + else: + trimH = '' + if options.trimL and int( options.trimL ) > 0: + trimL = '-3 %s' % options.trimL + else: + trimL = '' + if options.maqSoapAlign != '-1' and int( options.maqSoapAlign ) >= 0: + maqSoapAlign = '-v %s' % options.maqSoapAlign + else: + maqSoapAlign = '' + if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ + or options.mismatchSeed == '2' or options.mismatchSeed == '3'): + mismatchSeed = '-n %s' % options.mismatchSeed + else: + mismatchSeed = '' + if options.mismatchQual and int( options.mismatchQual ) >= 0: + mismatchQual = '-e %s' % options.mismatchQual + else: + mismatchQual = '' + if options.seedLen and int( options.seedLen ) >= 5: + seedLen = '-l %s' % options.seedLen + else: + seedLen = '' + if options.rounding == 'noRound': + rounding = '--nomaqround' + else: + rounding = '' + if options.minInsert and int( options.minInsert ) > 0: + minInsert = '-I %s' % options.minInsert + else: + minInsert = '' + if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0: + maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt + else: + maxAlignAttempt = '' + if options.forwardAlign == 'noForward': + forwardAlign = '--nofw' + else: + forwardAlign = '' + if options.reverseAlign == 'noReverse': + reverseAlign = '--norc' + else: + reverseAlign = '' + if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \ + ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ): + maxBacktracks = '--maxbts %s' % options.maxBacktracks + else: + maxBacktracks = '' + if options.tryHard == 'doTryHard': + tryHard = '-y' + else: + tryHard = '' + if options.valAlign and int( options.valAlign ) >= 0: + valAlign = '-k %s' % options.valAlign + else: + valAlign = '' + if options.allValAligns == 'doAllValAligns': + allValAligns = '-a' + else: + allValAligns = '' + if options.suppressAlign and int( options.suppressAlign ) >= 0: + suppressAlign = '-m %s' % options.suppressAlign + else: + suppressAlign = '' + if options.best == 'doBest': + best = '--best' + else: + best = '' + if options.strata == 'doStrata': + strata = '--strata' + else: + strata = '' + if options.offrate and int( options.offrate ) >= 0: + offrate = '-o %s' % options.offrate + else: + offrate = '' + if options.seed and int( options.seed ) >= 0: + seed = '--seed %s' % options.seed + else: + seed = '' + if options.paired == 'paired': + if options.output_unmapped_reads_l and options.output_unmapped_reads_r: + tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' ) + tmp_unmapped_file_name = tmp_unmapped_file.name + tmp_unmapped_file.close() + output_unmapped_reads = '--un %s' % tmp_unmapped_file_name + else: + output_unmapped_reads = '' + if options.output_suppressed_reads: + tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' ) + tmp_suppressed_file_name = tmp_suppressed_file.name + tmp_suppressed_file.close() + output_suppressed_reads = '--max %s' % tmp_suppressed_file_name + else: + output_suppressed_reads = '' + else: + if options.output_unmapped_reads: + output_unmapped_reads = '--un %s' % options.output_unmapped_reads + else: + output_unmapped_reads = '' + if options.output_suppressed_reads: + output_suppressed_reads = '--max %s' % options.output_suppressed_reads + else: + output_suppressed_reads = '' + snpfrac = '' + if options.snpphred and int( options.snpphred ) >= 0: + snpphred = '--snpphred %s' % options.snpphred + else: + snpphred = '' + if options.snpfrac and float( options.snpfrac ) >= 0: + snpfrac = '--snpfrac %s' % options.snpfrac + if options.keepends and options.keepends == 'doKeepends': + keepends = '--col-keepends' + else: + keepends = '' + aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \ + '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \ + ( maxInsert, mateOrient, options.threads, suppressHeader, + colorspace, skip, alignLimit, trimH, trimL, maqSoapAlign, + mismatchSeed, mismatchQual, seedLen, rounding, minInsert, + maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, + tryHard, valAlign, allValAligns, suppressAlign, best, + strata, offrate, seed, snpphred, snpfrac, keepends, + output_unmapped_reads, output_suppressed_reads, + quality_score_encoding ) + except ValueError, e: + # clean up temp dir + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) + try: + # have to nest try-except in try-finally to handle 2.4 + try: + # prepare actual mapping commands + if options.paired == 'paired': + cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output ) + else: + cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output ) + # align + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + # get suppressed and unmapped reads output files in place if appropriate + if options.paired == 'paired' and tmp_suppressed_file_name and \ + options.output_suppressed_reads_l and options.output_suppressed_reads_r: + try: + left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) + right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) + shutil.move( left, options.output_suppressed_reads_l ) + shutil.move( right, options.output_suppressed_reads_r ) + except Exception, e: + sys.stdout.write( 'Error producing the suppressed output file.\n' ) + if options.paired == 'paired' and tmp_unmapped_file_name and \ + options.output_unmapped_reads_l and options.output_unmapped_reads_r: + try: + left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' ) + right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' ) + shutil.move( left, options.output_unmapped_reads_l ) + shutil.move( right, options.output_unmapped_reads_r ) + except Exception, e: + sys.stdout.write( 'Error producing the unmapped output file.\n' ) + # check that there are results in the output file + if os.path.getsize( options.output ) == 0: + raise Exception, 'The output file is empty, there may be an error with your input file or settings.' + except Exception, e: + stop_err( 'Error aligning sequence. ' + str( e ) ) + finally: + # clean up temp dir + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stdout += 'Sequence file aligned.\n' + sys.stdout.write( stdout ) + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_in1.fastqcssanger Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,12 @@ +@869_1532_1255/1 +G2102223311000312223321002 ++ +=;8:?@=?;;9:8;=>;5A?;<8>< +@1278_2032_148/1 +T221320102201031010032022 ++ +4=,'&+2#88)%$)''-0(56% +@1278_2032_216/1 +T311231031130211223011020 ++ +6;:8<?8<;55<6=7;>/>6997<5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_in3.fastqcssanger Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,12 @@ +@869_1532_1255/1 +G2102223311000312223321002 ++ +=;8:?@=?;;9:8;=>;5A?;<8>< +@1278_2032_148/1 +T221320102201031010032022 ++ +4=,'&+2#88)%$)''-0(56% +@1278_2032_216/1 +T311231031130211223011020 ++ +6;:8<?8<;55<6=7;>/>6997<5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_in4.fastqcssanger Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,12 @@ +@869_1532_1255/2 +T1301222000112122113330022 ++ +;89<:==5<8>69;8=<9;<>9:=< +@1278_2031_2013/1 +G0200321200313231222033313 ++ +;21)8/5#5;45,)945#2173#.92 +@1278_2032_148/2 +G0020213032312123000133222 ++ +9490<4=:<=;8;;@<6;0>699#</
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_out1.sam Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,3 @@ +1278_2032_148/1 4 * 0 0 * * 0 0 GCTGACAGGACATCACAATGAGG ,'&+2#88)%$)''-0(56% XM:i:0 +1278_2032_216/1 4 * 0 0 * * 0 0 CCGTCATCCTAGCCGGTACCAGA :8<?8<;55<6=7;>/>6997<5 XM:i:0 +869_1532_1255/1 16 chrM 3753 255 23M * 0 0 TTTGATAGAGTAAAACATAGAGG YUSVY_UOXZWRQRSUY[\^XQ! XA:i:1 MD:Z:23 NM:i:0 CM:i:1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_out2.sam Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,6 @@ +1278_2031_2013/1 141 * 0 0 * * 0 0 GAATGCGAATCTGTCGGGATTTCT 1)8/5#5;45,)945#2173#.92 XM:i:0 +1278_2032_148 141 * 0 0 * * 0 0 AGAGCTATGTCGCGTAAACTTGGG 90<4=:<=;8;;@<6;0>699#</ XM:i:0 +1278_2032_148 77 * 0 0 * * 0 0 GCTGACAGGACATCACAATGAGG ,'&+2#88)%$)''-0(56% XM:i:0 +1278_2032_216 77 * 0 0 * * 0 0 CCGTCATCCTAGCCGGTACCAGA :8<?8<;55<6=7;>/>6997<5 XM:i:0 +869_1532_1255 115 chrM 3752 255 25M = 3727 -50 CTTTGATAGAGTAAAACATAGAGGC <YUSVY_UOXZWRQRSUY[\^XQ!! XA:i:1 MD:Z:25 NM:i:0 CM:i:1 +869_1532_1255 179 chrM 3727 255 25M = 3752 50 GGAAATATGTCTGACAAAAGAGTTA !"VRVYVSTXTRSNSUSPQYVUTP8 XA:i:1 MD:Z:25 NM:i:0 CM:i:1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_out3_1.fastq Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,8 @@ ++ ++ +4=,'&+2#88)%$)''-0(56% +6;:8<?8<;55<6=7;>/>6997<5 +@1278_2032_148/1 +@1278_2032_216/1 +T221320102201031010032022 +T311231031130211223011020
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_out3_2.fastq Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,8 @@ ++ ++ +9490<4=:<=;8;;@<6;0>699#</ +;21)8/5#5;45,)945#2173#.92 +@1278_2031_2013/1 +@1278_2032_148/2 +G0020213032312123000133222 +G0200321200313231222033313
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_out4.sam Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,3 @@ +1278_2032_148/1 4 * 0 0 * * 0 0 GCTGACAGGACATCACAATGAGG ,'&+2#88)%$)''-0(56% XM:i:0 +1278_2032_216/1 4 * 0 0 * * 0 0 CCGTCATCCTAGCCGGTACCAGA :8<?8<;55<6=7;>/>6997<5 XM:i:0 +869_1532_1255/1 16 chrM 3752 255 25M * 0 0 CTTTGATAGAGTAAAACATAGAGGC <YUSVY_UOXZWRQRSUY[\^XQ!! XA:i:1 MD:Z:25 NM:i:0 CM:i:1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_out5.sam Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,6 @@ +1278_2031_2013/1 141 * 0 0 * * 0 0 GAATGCGAATCTGTCGGGATTTCT 1)8/5#5;45,)945#2173#.92 XM:i:0 +1278_2032_148 141 * 0 0 * * 0 0 AGAGCTATGTCGCGTAAACTTGGG 90<4=:<=;8;;@<6;0>699#</ XM:i:0 +1278_2032_148 77 * 0 0 * * 0 0 GCTGACAGGACATCACAATGAGG ,'&+2#88)%$)''-0(56% XM:i:0 +1278_2032_216 77 * 0 0 * * 0 0 CCGTCATCCTAGCCGGTACCAGA :8<?8<;55<6=7;>/>6997<5 XM:i:0 +869_1532_1255 115 chrM 3753 255 23M = 3727 -49 TTTGATAGAGTAAAACATAGAGG YUSVY_UOXZWRQRSUY[\^XQ! XA:i:1 MD:Z:23 NM:i:0 CM:i:1 +869_1532_1255 179 chrM 3728 255 23M = 3752 47 GAAATATGTCTGACAAAAGAGTT "VRVYVSTXTRSNSUSPQYVUTP XA:i:1 MD:Z:23 NM:i:0 CM:i:1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/chr_m.fasta Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,335 @@ +>chrM +GTTAATGTAGCTTAATAATATAAAGCAAGGCACTGAAAATGCCTAGATGA +GTATTCTTACTCCATAAACACATAGGCTTGGTCCTAGCCTTTTTATTAGT +TATTAATAGAATTACACATGCAAGTATCCGCACCCCAGTGAGAATGCCCT +CTAAATCACGTCTCTACGATTAAAAGGAGCAGGTATCAAGCACACTAGAA +AGTAGCTCATAACACCTTGCTCAGCCACACCCCCACGGGACACAGCAGTG +ATAAAAATTAAGCTATGAACGAAAGTTCGACTAAGTCATATTAAATAAGG +GTTGGTAAATTTCGTGCCAGCCACCGCGGTCATACGATTAACCCAAATTA +ATAAATCTCCGGCGTAAAGCGTGTCAAAGACTAATACCAAAATAAAGTTA +AAACCCAGTTAAGCCGTAAAAAGCTACAACCAAAGTAAAATAGACTACGA +AAGTGACTTTAATACCTCTGACTACACGATAGCTAAGACCCAAACTGGGA +TTAGATACCCCACTATGCTTAGCCCTAAACTAAAATAGCTTACCACAACA +AAGCTATTCGCCAGAGTACTACTAGCAACAGCCTAAAACTCAAAGGACTT +GGCGGTGCTTTACATCCCTCTAGAGGAGCCTGTTCCATAATCGATAAACC +CCGATAAACCCCACCATCCCTTGCTAATTCAGCCTATATACCGCCATCTT +CAGCAAACCCTAAACAAGGTACCGAAGTAAGCACAAATATCCAACATAAA +AACGTTAGGTCAAGGTGTAGCCCATGGGATGGAGAGAAATGGGCTACATT +TTCTACCCTAAGAACAAGAACTTTAACCCGGACGAAAGTCTCCATGAAAC +TGGAGACTAAAGGAGGATTTAGCAGTAAATTAAGAATAGAGAGCTTAATT +GAATCAGGCCATGAAGCGCGCACACACCGCCCGTCACCCTCCTTAAATAT +CACAAATCATAACATAACATAAAACCGTGACCCAAACATATGAAAGGAGA +CAAGTCGTAACAAGGTAAGTATACCGGAAGGTGTACTTGGATAACCAAAG +TGTAGCTTAAACAAAGCATCCAGCTTACACCTAGAAGATTTCACTCAAAA +TGAACACTTTGAACTAAAGCTAGCCCAAACAATACCTAATTCAATTACCC +TTAGTCACTTAACTAAAACATTCACCAAACCATTAAAGTATAGGAGATAG +AAATTTTAACTTGGCGCTATAGAGAAAGTACCGTAAGGGAACGATGAAAG +ATGCATTAAAAGTACTAAACAGCAAAGCTTACCCCTTTTACCTTTTGCAT +AATGATTTAACTAGAATAAACTTAGCAAAGAGAACTTAAGCTAAGCACCC +CGAAACCAGACGAGCTACCTATGAACAGTTACAAATGAACCAACTCATCT +ATGTCGCAAAATAGTGAGAAGATTCGTAGGTAGAGGTGAAAAGCCCAACG +AGCCTGGTGATAGCTGGTTGTCCAGAAACAGAATTTCAGTTCAAATTTAA +ATTTACCTAAAAACTACTCAATTCTAATGTAAATTTAAATTATAGTCTAA +AAAGGTACAGCTTTTTAGATACAGGTTACAACCTTCATTAGAGAGTAAGA +ACAAGATAAACCCATAGTTGGCTTAAAAGCAGCCATCAATTAAGAAAGCG +TTCAAGCTCAACGACACATCTATCTTAATCCCAACAATCAACCCAAACTA +ACTCCTAATCTCATACTGGACTATTCTATCAACACATAGAAGCAATAATG +TTAATATGAGTAACAAGAATTATTTCTCCTTGCATAAGCTTATATCAGAA +CGAATACTCACTGATAGTTAACAACAAGATAGGGATAATCCAAAAACTAA +TCATCTATTTAAACCATTGTTAACCCAACACAGGCATGCATCTATAAGGA +AAGATTAAAAGAAGTAAAAGGAACTCGGCAAACACAAACCCCGCCTGTTT +ACCAAAAACATCACCTCTAGCATTTCCAGTATTAGAGGCACTGCCTGCCC +AGTGACATCTGTTtaaacggccgcggtatcctaaccgtgcaaaggtagca +taatcacttgttccctaaatagggacttgtatgaatggccacacgagggt +tttactgtctcttacttccaatcagtgaaattgaccttcccgtgaagagg +cgggaatgactaaataagacgagaagaccctatggagcttTAATTAACTG +ATTCACAAAAAACAACACACAAACCTTAACCTTCAGGGACAACAAAACTT +TTGATTGAATCAGCAATTTCGGTTGGGGTGACCTCGGAGAACAAAACAAC +CTCCGAGTGATTTAAATCCAGACTAACCAGTCAAAATATATAATCACTTA +TTGATCCAAACCATTGATCAACGGAACAAGTTACCCTAGGGATAACAGCG +CAATCCTATTCCAGAGTCCATATCGACAATTAGGGTTTACGACCTCGATG +TTGGATCAAGACATCCTAATGGTGCAACCGCTATTAAGGGTTCGTTTGTT +CAACGATTAAAGTCTTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGT +CGGTTTCTATCTATTCTATACTTTTCCCAGTACGAAAGGACAAGAAAAGT +AGGGCCCACTTTACAAGAAGCGCCCTCAAACTAATAGATGACATAATCTA +AATCTAACTAATTTATAACTTCTACCGCCCTAGAACAGGGCTCgttaggg +tggcagagcccggaaattgcataaaacttaaacctttacactcagaggtt +caactcctctccctaacaacaTGTTCATAATTAACGTCCTCCTCCTAATT +GTCCCAATCTTGCTCGCCGTAGCATTCCTCACACTAGTTGAACGAAAAGT +CTTAGGCTATATGCAACTTCGCAAAGGACCCAACATCGTAGGCCCCTATG +GCCTACTACAACCTATTGCCGATGCCCTCAAACTATTTATCAAAGAGCCA +CTACAACCACTAACATCATCGACATCCATATTCATCATCGCACCAATCCT +AGCCCTAACCCTGGCCTTAACCATATGAATCCCTCTGCCCATACCATACC +CACTAATCAACATAAACCTAGGAATTCTATTCATACTAGCCATGTCCAGC +CTAGCTGTCTACTCAATCCTTTGATCAGGATGGGCCTCAAACTCAAAATA +CGCCCTAATTGGAGCTCTACGAGCAGTAGCACAAACCATCTCATACGAAG +TAACTCTAGCAATCATCCTACTCTCAGTCCTCCTAATAAGCGGATCATTC +ACATTATCAACACTTATTATTACCCAAGAATACCTCTGATTAATCTTCCC +ATCATGACCCTTAGCCATAATGTGATTCATCTCAACATTAGCCGAAACCA +ACCGAGCTCCATTTGACCTAACAGAAGGAGAATCAGAACTCGTCTCTGGA +TTCAACGTTGAATACGCAGCCGGCCCATTTGCTCTATTCTTCCTAGCAGA +ATACGCAAACATCATCATGATAAACATCTTCACAACAACCCTATTTCTAG +GAGCATTTCACAACCCCTACCTGCCAGAACTCTACTCAATTAATTTCACC +ATTAAAGCTCTCCTTCTAACATGTTCCTTCCTATGAATCCGAGCATCCTA +CCCACGATTCCGATATGACCAACTTATACACCTCCTATGAAAGAACTTCC +TACCACTCACACTAGCCCTCTGCATATGACACGTCTCACTTCCAATCATA +CTATCCAGCATCCCACCACAAACATAGGAAATATGTCTGACAAAAGAGTT +ACTTTGATAGAGTAAAACATAGAGGCTCAAACCCTCTTATTTctagaact +acaggaattgaacctgctcctgagaattcaaaatcctccgtgctaccgaa +ttacaccatgtcctaCAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCAT +ACCCCGAAAATGTTGGATTACACCCTTCCCGTACTAATAAATCCCCTTAT +CTTCACAACTATTCTAATAACAGTTCTTCTAGGAACTATAATCGTTATAA +TAAGCTCACACTGACTAATAATCTGAATCGGATTTGAAATAAATCTACTA +GCCATTATCCCTATCCTAATAAAAAAGTACAATCCCCGAACCATAGAAGC +CTCCACCAAATATTTTCTAACCCAAGCCACCGCATCAATACTCCTCATAA +TAGCGATCATCATTAACCTCATACACTCAGGCCAATGAACAATCACAAAA +GTCTTCAACCCCACAGCGTCCATCATTATAACTTCAGCTCTCGCCATAAA +ACTTGGACTCACACCATTCCACTTCTGAGTACCCGAAGTCACACAGGGCA +TCTCATTAACATCAGGTCTCATCCTACTTACATGACAAAAACTAGCCCCA +ATATCAATCCTATATCAAATCTCACCCTCAATTAACCTAAATATCTTATT +AACTATAGCCGTACTGTCAATCCTAGTAGGAGGCTGAGGCGGTCTCAACC +AAACCCAACTACGAAAAATCATAGCATACTCGTCAATCGCGCATATAGGA +TGAATAACAGCTGTCCTAGTATATAACCCAACACTAACAATACTAAACAT +ATTAATTTACATTATAATAACACTCACAATATTCATACTATTTATCCACA +GCTCCTCTACTACAACACTATCACTCTCCCACACATGAAACAAAATACCT +CTAACCACTACACTAATCTTAATTACCTTACTATCCATAGGAGGCCTCCC +CCCACTATCAGGATTCATACCCAAATGAATAATCATTCAAGAGCTCACCA +AAAATAGCAGCATCATCCTCCCCACACTAATAGCCATTATAGCACTACTC +AACCTCTACTTCTACATACGACTAACCTATTCCACCTCACTGACCATATT +CCCATCCACAAACAACATAAAAATAAAATGACAATTCGAAACCAAACGAA +TTACTCTCTTACCCCCGTTAATTGTTATATCCTCCCTACTCCTCCCCCTA +ACCCCCATACTATCAATTTTGGACTAGGAATTTAGGTTAACATCCCAGAC +CAAGAGCCTTCAAAGCTCTAAGCAAGTGAATCCACTTAATTCCTGCATAC +TAAGGACTGCGAGACTCTATCTCACATCAATTGAACGCAAATCAAACTCT +TTTATTAAGCTAAGCCCTTACTAGATTGGTGGGCTACCATCCCACGAAAT +TTTAGTTAACAGCTAAATACCCTAATCAACTGGCTTCAATCTACTTCTCC +CGCCGCCTAGAAAAAAAGGCGGGAGAAGCCCCGGCAGAAATTGAAGCTGC +TCCTTTGAATTTGCAATTCAATGTGAAAATTCACCACGGGACTTGATAAG +AAGAGGATTCCAACCCCTGTCTTTAGATTTACAGTCTAATGCTTACTCAG +CCATCTTACCTATGTTCATCAACCGCTGACTATTTTCAACTAACCACAAA +GACATCGGCACTCTGTACCTCCTATTCGGCGCTTGAGCTGGAATAGTAGG +AACTGCCCTAAGCCTCCTAATCCGTGCTGAATTAGGCCAACCTGGGACCC +TACTAGGAGATGATCAGATCTACAATGTCATTGTAACCGCCCATGCATTC +GTAATAATTTTCTTTATGGTCATACCCATTATAATCGGAGGATTCGGAAA +CTGATTAGTCCCCCTGATAATTGGAGCACCTGATATAGCTTTCCCCCGAA +TAAACAACATAAGCTTCTGATTACTTCCCCCATCATTCCTACTTCTTCTC +GCTTCCTCAATAATTGAAGCAGGTGCCGGAACAGGCTGAACCGTATATCC +TCCTCTAGCTGGAAATCTGGCGCATGCAGGAGCCTCTGTTGACTTAACCA +TTTTCTCTCTCCACCTAGCTGGGGTGTCCTCGATTTTAGGTGCCATCAAC +TTTATTACCACAATCATTAACATAAAACCACCAGCCCTATCCCAATATCA +AACCCCCCTATTCGTTTGATCTGTCCTTATTACGGCAGTACTCCTTCTCC +TAGCCCTCCCGGTCCTAGCAGCAGGCATTACCATGCTTCTCACAGACCGT +AACCTGAACACTACTTTCTTCGACCCCGCAGGAGGAGGGGATCCAATCCT +TTATCAACACCTATTCTGATTCTTCGGACACCCCGAAGTCTATATTCTTA +TCCTACCAGGCTTCGGTATAATCTCACACATCGTCACATACTACTCAGGT +AAAAAGGAACCTTTTGGCTACATGGGTATAGTGTGAGCTATAATATCCAT +TGGCTTTCTAGGCTTCATCGTATGGGCTCACCACATGTTTACAGTAGGGA +TAGACGTTGACACACGAGCATACTTCACATCAGCTACCATAATCATCGCT +ATCCCTACTGGTGTAAAAGTATTCAGCTGACTAGCCACCCTGCACGGAGG +AAATATCAAATGATCTCCAGCTATACTCTGAGCTCTAGGCTTCATCTTCT +TATTCACAGTAGGAGGTCTAACAGGAATCGTCCTAGCTAACTCATCCCTA +GATATTGTTCTCCACGATACTTATTATGTAGTAGCACATTTCCATTATGT +CCTGTCTATAGGAGCAGTCTTCGCCATTATGGGGGGATTTGTACACTGAT +TCCCTCTATTCTCAGGATACACACTCAACCAAACCTGAGCAAAAATCCAC +TTTACAATTATATTCGTAGGGGTAAATATAACCTTCTTCCCACAACATTT +CCTTGGCCTCTCAGGAATGCCACGACGCTATTCTGATTATCCAGACGCAT +ATACAACATGAAATACCATCTCATCCATAGGATCTTTTATCTCACTTACA +GCAGTGATACTAATAATTTTCATAATTTGAGAAGCGTTCGCATCCAAACG +AGAAGTGTCTACAGTAGAATTAACCTCAACTAATCTGGAATGACTACACG +GATGCCCCCCACCATACCACACATTTGAAGAACCCACCTACGTAAACCTA +AAAtaagaaaggaaggaatcgaaccccctctaactggtttcaagccaata +tcataaccactatgtctttctcCATCAATTGAGGTATTAGTAAAAATTAC +ATGACTTTGTCAAAGTTAAATTATAGGTTAAACCCCTATATACCTCTATG +GCCTACCCCTTCCAACTAGGATTCCAAGACGCAACATCCCCTATTATAGA +AGAACTCCTACACTTCCACGACCACACACTAATAATCGTATTCCTAATTA +GCTCTCTAGTATTATATATTATCTCATCAATACTAACAACTAAATTAACC +CATACCAGCACCATAGATGCTCAAGAAGTAGAGACAATTTGAACGATTTT +ACCAGCCATCATCCTTATTCTAATCGCCCTCCCATCCCTACGAATTCTAT +ATATAATAGATGAAATCAATAATCCGTCCCTCACAGTCAAAACAATAGGC +CACCAATGATACTGAAGCTACGAGTATACCGATTACGAAGACTTGACCTT +TGACTCCTACATGATCCCCACATCAGACCTAAAACCAGGAGAATTACGTC +TTCTAGAAGTCGACAATCGAGTGGTTCTCCCCATAGAAATAACCATCCGA +ATGCTAATTTCATCCGAAGACGTCCTACACTCATGAGCTGTGCCCTCCCT +AGGCCTAAAAACAGACGCTATCCCTGGGCGCCTAAATCAGACAACTCTCG +TGGCCTCTCGACCAGGACTTTACTACGGTCAATGCTCAGAGATCTGCGGA +TCAAACCACAGCTTTATACCAATTGTCCTTGAACTAGTTCCACTGAAACA +CTTCGAAGAATGATCTGCATCAATATTATAAAGTCACTAAGAAGCTATTA +TAGCATTAACCTTTTAAGTTAAAGATTGAGGGTTCAACCCCCTCCCTAGT +GATATGCCACAGTTGGATACATCAACATGATTTATTAATATCGTCTCAAT +AATCCTAACTCTATTTATTGTATTTCAACTAAAAATCTCAAAGCACTCCT +ATCCGACACACCCAGAAGTAAAGACAACCAAAATAACAAAACACTCTGCC +CCTTGAGAATCAAAATGAACGAAAATCTATTCGCCTCTTTCGCTACCCCA +ACAATAGTAGGCCTCCCTATTGTAATTCTGATCATCATATTTCCCAGCAT +CCTATTCCCCTCACCCAACCGACTAATCAACAATCGCCTAATCTCAATTC +AACAATGGCTAGTCCAACTTACATCAAAACAAATAATAGCTATCCATAAC +AGCAAAGGACAAACCTGAACTCTTATACTCATATCACTGATCCTATTCAT +TGGCTCAACAAACTTATTAGGCCTACTACCTCACTCATTTACACCAACAA +CACAACTATCAATAAACCTAGGCATAGCTATTCCCCTATGGGCAGGGACA +GTATTCATAGGCTTTCGTCACAAAACAAAAGCAGCCCTAGCCCACTTTCT +ACCTCAAGGGACGCCCATTTTCCTCATCCCCATACTAGTAATTATCGAGA +CTATCAGCCTATTTATTCAACCTGTAGCCCTAGCCGTGCGGCTAACCGCT +AACATTACCGCCGGACACCTCCTAATACACCTCATCGGAGGGGCAACACT +AGCCCTCATAAGCATCAGCCCCTCAACAGCCCTTATTACGTTTATCATCC +TAATTCTACTAACTATCCTCGAATTCGCAGTAGCTATAATCCAAGCCTAC +GTATTCACTCTCCTGGTAAGCCTTTACTTACACGACAACACCTAATGACC +CACCAAACCCACGCTTACCACATAGTAAACCCCAGCCCATGACCACTTAC +AGGAGCCCTATCAGCCCTCCTGATAACATCAGGACTAGCCATGTGATTTC +ACTTTAACTCAACCTTACTTCTAGCTATAGGGCTATTAACTAACATCCTT +ACCATATATCAATGATGACGAGACATCATCCGAGAAAGCACATTCCAAGG +CCATCACACATCAATCGTTCAAAAGGGACTCCGATATGGCATAATCCTTT +TTATTATCTCAGAAGTCTTCTTCTTCTCTGGCTTCTTCTGAGCCTTTTAC +CACTCAAGCCTAGCCCCCACACCCGAACTAGGCGGCTGCTGACCACCCAC +AGGTATCCACCCCTTAAACCCCCTAGAAGTCCCCTTACTCAACACCTCAG +TGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCACCATAGCCTAATA +GAAGGAAACCGTAAAAATATGCTCCAAGGCCTATTCATCACAATTTCACT +AGGCGTATACTTCACCCTTCTCCAAGCCTCAGAATACTATGAAGCCTCAT +TTACTATTTCAGATGGAGTATACGGATCAACATTTTTCGTAGCAACAGGG +TTCCACGGACTACACGTAATTATCGGATCTACCTTCCTCATTGTATGTTT +CCTACGCCAACTAAAATTCCACTTTACATCCAGCCACCACTTCGGATTCG +AAGCAGCCGCTTGATACTGACACTTCGTCGACGTAGTCTGACTATTCTTG +TACGTCTCTATTTATTGATGAGGATCCTATTCTTTTAGTATTGACCAGTA +CAATTGACTTCCAATCAATCAGCTTCGGTATAACCCGAAAAAGAATAATA +AACCTCATACTGACACTCCTCACTAACACATTACTAGCCTCGCTACTCGT +ACTCATCGCATTCTGACTACCACAACTAAACATCTATGCAGAAAAAACCA +GCCCATATGAATGCGGATTTGACCCTATAGGGTCAGCACGCCTCCCCTTC +TCAATAAAATTTTTCTTAGTGGCCATTACATTTCTGCTATTCGACTTAGA +AATTGCCCTCCTATTACCCCTTCCATGAGCATCCCAAACAACTAACCTAA +ACACTATACTTATCATAGCACTAGTCCTAATCTCTCTTCTAGCCATCAGC +CTAGCCTACGAATGAACCCAAAAAGGACTAGAATGAACTGAGTATGGTAA +TTAGTTTAAACCAAAACAAATGATTTCGACTCATTAAACTATGATTAACT +TCATAATTACCAACATGTCACTAGTCCATATTAATATCTTCCTAGCATTC +ACAGTATCCCTCGTAGGCCTACTAATGTACCGATCCCACCTAATATCCTC +ACTCCTATGCCTAGAAGGAATAATACTATCACTATTCGTCATAGCAACCA +TAATAGTCCTAAACACCCACTTCACACTAGCTAGTATAATACCTATCATC +TTACTAGTATTTGCTGCCTGCGAACGAGCTCTAGGATTATCCCTACTAGT +CATAGTCTCCAATACTTATGGAGTAGACCACGTACAAAACCTTAACCTCC +TCCAATGCTAAAAATTATCATTCCCACAATCATACTTATGCCCCTTACAT +GACTATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATAGTCTA +TTAATCAGCCTTATCAGCCTATCCCTCCTAAACCAACCTAGCAACAATAG +CCTAAACTTCTCACTAATATTCTTCTCCGATCCCCTATCAGCCCCACTTC +TGGTGTTGACAACATGACTACTGCCACTAATACTCATAGCCAGCCAACAC +CATCTATCTAAGGAACCACTAATCCGAAAAAAACTCTACATCACCATGCT +AACCATACTTCAAACTTTCCTAATCATGACTTTTACCGCCACAGAACTAA +TCTCCTTCTACATCCTATTTGAAGCCACATTAGTTCCAACACTAATTATC +ATCACCCGCTGAGGCAACCAAACAGAACGCCTGAACGCAGGCCTCTACTT +CCTATTCTACACACTAATAGGTTCCCTCCCACTCTTAGTTGCACTAATCT +CTATCCAAAACCTAACAGGCTCACTAAACTTCCTATTAATTCAATACTGA +AACCAAGCACTACCCGACTCTTGATCCAATATTTTCCTATGACTAGCATG +TATAATAGCATTCATAGTCAAAATACCGGTATATGGTCTTCACCTCTGAC +TCCCAAAAGCCCATGTAGAAGCCCCAATTGCCGGATCCATAGTGCTAGCA +GCCATTCTACTAAAACTAGGAGGCTACGGAATACTACGAATTACAACAAT +ACTAAACCCCCAAACTAGCTTTATAGCCTACCCCTTCCTCATACTATCCC +TGTGAGGAATAATCATAACTAGTTCCATCTGCTTGCGACAAACCGATCTA +AAATCACTTATTGCATACTCCTCTGTCAGCCACATAGCCCTAGTAATCGT +AGCCGTCCTCATCCAAACACCATGAAGTTATATAGGAGCTACAGCCCTAA +TAATCGCTCACGGCCTTACATCATCAATACTATTCTGCCTGGCAAACTCA +AATTACGAACGTACCCATAGCCGAACTATAATCCTAGCCCGCGGGCTTCA +AACACTTCTTCCCCTTATAGCAGCCTGATGACTATTAGCCAGCCTAACCA +ACCTGGCCCTCCCTCCCAGCATTAACCTAATTGGAGAGCTATTCGTAGTA +ATATCATCATTCTCATGATCAAATATTACCATTATCCTAATAGGAGCCAA +TATCACCATCACCGCCCTCTACTCCCTATACATACTAATCACAACACAAC +GAGGGAAATACACACACCATATCAACAGCATTAAACCTTCATTTACACGA +GAAAACGCACTCATGGCCCTCCACATGACTCCCCTACTACTCCTATCACT +TAACCCTAAAATTATCCTAGGCTTTACGTACTGTAAATATAGTTTAACAA +AAACACTAGATTGTGGATCTAGAAACAGAAACTTAATATTTCTTATTTAC +CGAGAAAGTATGCAAGAACTGCTAATTCATGCCCCCATGTCCAACAAACA +TGGCTCTCTCAAACTTTTAAAGGATAGGAGCTATCCGTTGGTCTTAGGAA +CCAAAAAATTGGTGCAACTCCAAATAAAAGTAATCAACATGTTCTCCTCC +CTCATACTAGTTTCACTATTAGTACTAACCCTCCCAATCATATTATCAAT +CTTCAATACCTACAAAAACAGCACGTTCCCGCATCATGTAAAAAACACTA +TCTCATATGCCTTCATTACTAGCCTAATTCCCACTATAATATTTATTCAC +TCTGGACAAGAAACAATTATCTCAAACTGACACTGAATAACCATACAAAC +CCTCAAACTATCCCTAAGCTTCAAACTAGATTACTTCTCAATAATTTTCG +TACCAGTAGCCCTATTCGTAACATGATCTATTATGGAATTCTCCCTATGA +TACATGCACTCAGATCCTTACATTACTCGATTTTTTAAATACTTACTTAC +ATTCCTCATCACTATAATAATTCTAGTCACAGCTAACAACCTTTTCCAAC +TGTTCATCGGATGGGAGGGAGTAGGCATCATGTCATTCTTACTAATCGGA +TGATGATACGGCCGAACAGATGCCAACACCGCGGCCCTTCAAGCAATCCT +TTATAACCGCATCGGGGATATCGGCTTCATCATGGCCATAGCCTGATTCC +TATTCAACACCAACACATGAGACCTCCAACAAATCTTCATACTCGACCCC +AACCTTACCAACCTCCCGCTCCTAGGCCTCCTCCTAGCCGCAACTGGCAA +ATCCGCTCAATTTGGACTCCACCCATGACTTCCTTCAGCCATAGAGGGCC +CTACACCAGTCTCAGCCCTACTCCACTCCAGCACAATAGTTGTAGCAGGC +GTCTTCCTGCTAATCCGCTTCCATCCACTAATAGAAAACAACAAAACAAT +CCAGTCACTTACCCTATGCCTAGGAGCCATCACCACACTATTCACAGCAA +TCTGCGCACTCACTCAAAACGATATCAAAAAAATCATTGCTTTCTCCACC +TCCAGCCAACTAGGCCTGATAATCGTAACCATCGGTATCAATCAACCCTA +CCTAGCATTCCTCCACATTTGCACTCACGCATTCTTCAAAGCTATACTAT +TTATATGTTCCGGATCCATTATCCACAGCCTAAATGACGAGCAAGATATC +CGAAAAATAGGCGGACTATTTAATGCAATACCCTTCACCACCACATCTCT +AATTATTGGCAGCCTTGCACTCACCGGAATTCCTTTCCTCACAGGCTTCT +ACTCCAAAGACCTCATCATCGAAACCGCCAACACATCGTACACCAACGCC +TGAGCCCTACTAATAACTCTCATTGCCACATCCCTCACAGCTGTCTACAG +TACCCGAATCATCTTCTTTGCACTCCTAGGGCAACCCCGCTTCCTCCCTC +TGACCTCAATCAACGAAAATAACCCCTTTCTAATTAACTCCATCAAACGC +CTCTTAATTGGCAGCATTTTTGCCGGATTCTTCATCTCCAACAATATCTA +CCCCACAACCGTCCCAGAAATAACCATACCTACTTACATAAAACTCACCG +CCCTCGCAGTAACCATCCTAGGATTTACACTAGCCCTAGAACTAAGCTTG +ATAACCCATAACTTAAAACTAGAACACTCCACCAACGTATTCAAATTCTC +CAACCTCCTAGGATACTACCCAACAATTATACACCGACTCCCACCGCTCG +CTAACCTATCAATAAGCCAAAAATCAGCATCACTTCTACTAGACTCAATC +TGACTAGAAAACATCCTGCCAAAATCTATCTCCCAGTTCCAAATAAAAAC +CTCGATCCTAATTTCCACCCAAAAAGGACAAATCAAATTATATTTCCTCT +CATTCCTCATCACCCTTACCCTAAGCATACTACTTTTTAATCTCCACGAG +TAACCTCTAAAATTACCAAGACCCCAACAAGCAACGATCAACCAGTCACA +ATCACAACCCAAGCCCCATAACTATACAATGCAGCAGCCCCTATAATTTC +CTCACTAAACGCCCCAGAATCTCCAGTATCATAAATAGCTCAAGCCCCCA +CACCACTAAACTTAAACACTACCCCCACTTCCTCACTCTTCAGAACATAT +AAAACCAACATAACCTCCATCAACAACCCTAAAAGAAATACCCCCATAAC +AGTCGTATTAGACACCCATACCTCAGGATACTGCTCAGTAGCCATAGCCG +TTGTATAACCAAAAACAACCAACATTCCTCCCAAATAAATCAAAAACACC +ATCAACCCCAAAAAGGACCCTCCAAAATTCATAATAATACCACAACCTAC +CCCTCCACTTACAATCAGCACTAAACCCCCATAAATAGGTGAAGGTTTTG +AAGAAAACCCCACAAAACTAACAACAAAAATAACACTCAAAATAAACACA +ATATATGTCATCATTATTCCCACGTGGAATCTAACCACGACCAATGACAT +GAAAAATCATCGTTGTATTTCAACTATAAGAACACCAATGACAAACATCC +GGAAATCTCACCCACTAATTAAAATCATCAATCACTCTTTTATTGACCTA +CCAGCCCCCTCAAACATTTCATCATGATGAAACTTCGGCTCCCTCCTAGG +AATCTGCCTAATCCTCCAAATCTTAACAGGCCTATTCCTAGCCATACACT +ACACATCAGACACGACAACTGCCTTCTCATCCGTCACTCACATCTGCCGA +GACGTTAACTACGGATGAATTATTCGCTACCTCCATGCCAACGGAGCATC +AATATTTTTTATCTGCCTCTTCATTCACGTAGGACGCGGCCTCTACTACG +GCTCTTACACATTCCTAGAGACATGAAACATTGGAATCATCCTACTTTTC +ACAGTTATAGCTACAGCATTCATGGGCTATGTCCTACCATGAGGCCAAAT +ATCCTTTTGAGGAGCAACAGTCATCACGAACCTCCTATCAGCAATTCCCT +ACATCGGTACTACCCTCGTCGAGTGAATCTGAGGTGGATTCTCAGTAGAC +AAAGCCACCCTTACCCGATTTTTTGCTTTCCACTTCATCCTACCCTTCAT +CATCACAGCCCTGGTAGTCGTACATTTACTATTTCTTCACGAAACAGGAT +CTAATAACCCCTCAGGAATCCCATCCGATATGGACAAAATCCCATTCCAC +CCATATTATACAATTAAAGACATCCTAGGACTCCTCCTCCTGATCTTGCT +CCTACTAACTCTAGTATTATTCTCCCCCGACCTCCTAGGAGACCCAGACA +ACTACACCCCAGCTAACCCTCTCAGCACTCCCCCTCATATTAAACCAGAA +TGGTACTTCCTGTTTGCCTACGCCATCCTACGCTCCATTCCCAACAAACT +AGGCGGCGTATTAGCCCTAATCCTCTCCATCCTGATCCTAGCACTCATCC +CCACCCTCCACATATCAAAACAACGAAGCATAATATTCCGGCCTCTCAGC +CAATGCGTATTCTGACTCTTAGTGGCAGACTTACTGACACTAACATGAAT +CGGCGGACAGCCAGTGGAACACCCATACGTAATTATCGGCCAACTGGCCT +CAATCCTCTACTTCTCCCTAATTCTCATTTTTATACCACTCGCAAGCACC +ATCGAAAACAATCTTCTAAAATGAAGAGTCCCTGTAGTATATCGCACATT +ACCCTGGTCTTGTAAACCAGAAAAGGGGGAAAACGTTTCCTCCCAAGGAC +TATCAAGGAAGAAGCTCTAGCTCCACCATCAACACCCAAAGCTGAAATTC +TACTTAAACTATTCCTTGATTTCTTCCCCTAAACGACAACAATTTACCCT +CATGTGCTATGTCAGTATCAGATTATACCCCCACATAACACCATACCCAC +CTGACATGCAATATCTTATGAATGGCCTATGTACGTCGTGCATTAAATTG +TCTGCCCCATGAATAATAAGCATGTACATAATATCATTTATCTTACATAA +GTACATTATATTATTGATCGTGCATACCCCATCCAAGTCAAATCATTTCC +AGTCAACACGCATATCACAGCCCATGTTCCACGAGCTTAATCACCAAGCC +GCGGGAAATCAGCAACCCTCCCAACTACGTGTCCCAATCCTCGCTCCGGG +CCCATCCAAACGTGGGGGTTTCTACAATGAAACTATACCTGGCATCTGGT +TCTTTCTTCAGGGCCATTCCCACCCAACCTCGCCCATTCTTTCCCCTTAA +ATAAGACATCTCGATGGACTAATGACTAATCAGCCCATGCTCACACATAA +CTGTGATTTCATGCATTTGGTATCTTTTTATATTTGGGGATGCTATGACT +CAGCTATGGCCGTCAAAGGCCTCGACGCAGTCAATTAAATTGAAGCTGGA +CTTAAATTGAACGTTATTCCTCCGCATCAGCAACCATAAGGTGTTATTCA +GTCCATGGTAGCGGGACATAGGAAACAAgtgcacctgtgcacctgtgcac +ctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacct +gtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgt +gcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgc +acctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcac +ctgtgcacctACCCGCGCAGTAAGCAAGTAATATAGCTTTCTTAATCAAA +CCCCCCCTACCCCCCATTAAACTCCACATATGTACATTCAACACAATCTT +GCCAAACCCCAAAAACAAGACTAAACAATGCACAATACTTCATGAAGCTT +AACCCTCGCATGCCAACCATAATAACTCAACACACCTAACAATCTTAACA +GAACTTTCCCCCCGCCATTAATACCAACATGCTACTTTAATCAATAAAAT +TTCCATAGACAGGCATCCCCCTAGATCTAATTTTCTAAATCTGTCAACCC +TTCTTCCCCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/bowtie_indices_color.loc.sample Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,37 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Bowtie indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie_indices.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/bowtie/hg18/, +#then the bowtie_indices.loc entry would look like this: +# +#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18 +# +#and your /depot/data2/galaxy/bowtie/hg18/ directory +#would contain hg18.*.ebwt files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt +#...etc... +# +#Your bowtie_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon +#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie/hg18/hg18full +#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie/hg19/hg19 +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of indexes in the Bowtie color-space mapper format --> + <table name="bowtie_indexes_color" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bowtie_indices_color.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Nov 26 09:47:13 2012 -0500 @@ -0,0 +1,30 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bowtie" version="0.12.7"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://downloads.sourceforge.net/project/bowtie-bio/bowtie/0.12.7/bowtie-0.12.7-src.zip</action> + <action type="shell_command">make</action> + <action type="move_file"> + <source>bowtie</source> + <destination>$INSTALL_DIR/bin</destination> + </action> + <action type="move_file"> + <source>bowtie-build</source> + <destination>$INSTALL_DIR/bin</destination> + </action> + <action type="move_file"> + <source>bowtie-inspect</source> + <destination>$INSTALL_DIR/bin</destination> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> +Compiling Bowtie requires libpthread to be present on your system. + </readme> + </package> +</tool_dependency> +