Mercurial > repos > iuc > bbtools_bbmap
changeset 4:6d44c9f1a42b draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bbtools commit 1800b13dcea155e96558f6b80989cf719071e300"
author | iuc |
---|---|
date | Thu, 03 Feb 2022 21:13:25 +0000 |
parents | 8157a81f511c |
children | 81cbb6a4c1ca |
files | bbmap.xml macros.xml test-data/output1.bam test-data/output2.bam test-data/output3.bam |
diffstat | 5 files changed, 257 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/bbmap.xml Tue Feb 01 23:01:08 2022 +0000 +++ b/bbmap.xml Thu Feb 03 21:13:25 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="bbtools_bbmap" name="BBTools: BBMap" version="@WRAPPER_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> +<tool id="bbtools_bbmap" name="BBTools: BBMap" version="@WRAPPER_VERSION@+galaxy3" profile="@PROFILE@"> <description>short-read aligner</description> <macros> <import>macros.xml</import> @@ -44,59 +44,274 @@ ln -s '${read2}' '${read2_file}' && #end if -bbmap.sh t=\${GALAXY_SLOTS:-4} ref='${ref}' +bbmap.sh + +#### Indexing Parameters +nodisk=f +ref='${ref}' +k=13 +usemodulo=f +rebuild=f + +#### Input Parameters #if str($input_type_cond.input_type) == 'single': in='${read1_file}' #else: - in1='${read1_file}' in2='${read2_file}' + in='${read1_file}' in2='${read2_file}' #end if -#if str($output_sort) == 'coordinate': - out='mapped.bam'; samtools sort -@\${GALAXY_SLOTS:-4} -T "\${TMPDIR:-.}" -O bam -o '$output' 'mapped.bam' -#elif str($output_sort) == 'name': - out='mapped.bam'; samtools sort -n -@\${GALAXY_SLOTS:-4} -T '\${TMPDIR:-.}' -O bam -o '$output' 'mapped.bam' +fastareadlen=500 +unpigz=f +touppercase=t + +#### Sampling Parameters +reads=-1 +samplerate=1 +skipreads=0 + +#### Mapping Parameters +maxindel=$mapping_options.maxindel +strictmaxindel='$mapping_options.strictmaxindel' +tipsearch=$mapping_options.tipsearch +minid=$mapping_options.minid +minhits=$mapping_options.minhits +local='$mapping_options.local' +perfectmode='$mapping_options.perfectmode' +semiperfectmode='$mapping_options.semiperfectmode' +threads=\${GALAXY_SLOTS:-4} +ambiguous='$mapping_options.ambiguous' +samestrandpairs='$mapping_options.samestrandpairs' +requirecorrectstrand='$mapping_options.requirecorrectstrand' +killbadpairs='$mapping_options.killbadpairs' +pairedonly='$mapping_options.pairedonly' +rcomp='$mapping_options.rcomp' +rcompmate='$mapping_options.rcompmate' +pairlen=$mapping_options.pairlen +rescuedist=$mapping_options.rescuedist +rescuemismatches=$mapping_options.rescuemismatches +averagepairdist=$mapping_options.averagepairdist +deterministic='$mapping_options.deterministic' +bandwidthratio='$mapping_options.bandwidthratio' +bandwidth='$mapping_options.bandwidth' +usejni='f' +maxsites2=$mapping_options.maxsites2 +ignorefrequentkmers='$mapping_options.ignorefrequentkmers' +excludefraction=$mapping_options.excludefraction +greedy='$mapping_options.greedy' +kfilter=$mapping_options.kfilter + +#### Quality and Trimming Parameters +qin='auto' +qout='auto' +qtrim='$qt_options.qtrim' +untrim='$qt_options.untrim' +trimq=$qt_options.trimq +mintrimlength=$qt_options.mintrimlength +fakefastaquality=$qt_options.fakefastaquality +ignorebadquality='$qt_options.ignorebadquality' +usequality='$qt_options.usequality' +minaveragequality=$qt_options.minaveragequality +maqb=$qt_options.maqb + +#### Post-Filtering options +idfilter=$pf_options.idfilter +subfilter=$pf_options.subfilter +insfilter=$pf_options.insfilter +delfilter=$pf_options.delfilter +indelfilter=$pf_options.indelfilter +editfilter=$pf_options.editfilter +inslenfilter=$pf_options.inslenfilter +dellenfilter=$pf_options.dellenfilter +nfilter=$pf_options.nfilter + +#### Output parameters +secondary='$output_options.secondary' +maxsites=$output_options.maxsites +sssr=$output_options.sssr +ssao='$output_options.ssao' +quickmatch='$output_options.quickmatch' +trimreaddescriptions='$output_options.trimreaddescriptions' +machineout='$output_options.machineout' +printunmappedcount='$output_options.printunmappedcount' +renamebyinsert='$output_options.renamebyinsert' +#if str($output_options.output_sort) == 'coordinate': + out='all_reads.bam' + outu='unmapped_reads.bam' + outm='mapped_reads.bam' + && samtools sort -@\${GALAXY_SLOTS:-4} -T "\${TMPDIR:-.}" -O bam -o '$output_all_reads' 'all_reads.bam' + && samtools sort -@\${GALAXY_SLOTS:-4} -T "\${TMPDIR:-.}" -O bam -o '$output_unmapped_reads' 'unmapped_reads.bam' + && samtools sort -@\${GALAXY_SLOTS:-4} -T "\${TMPDIR:-.}" -O bam -o '$output_mapped_reads' 'mapped_reads.bam' +#elif str($output_options.output_sort) == 'name': + out='all_reads.bam' + outu='unmapped_reads.bam' + outm='mapped_reads.bam' + && samtools sort -n -@\${GALAXY_SLOTS:-4} -T '\${TMPDIR:-.}' -O bam -o '$output_all_reads' 'all_reads.bam' + && samtools sort -n -@\${GALAXY_SLOTS:-4} -T '\${TMPDIR:-.}' -O bam -o '$output_unmapped_reads' 'unmapped_reads.bam' + && samtools sort -n -@\${GALAXY_SLOTS:-4} -T '\${TMPDIR:-.}' -O bam -o '$output_mapped_reads' 'mapped_reads.bam' #else: - out='mapped.bam' && mv 'mapped.bam' '$output' + out='all_reads.bam' + outu='unmapped_reads.bam' + outm='mapped_reads.bam' + && mv 'all_reads.bam' '$output_all_reads' + && mv 'unmapped_reads.bam' '$output_unmapped_reads' + && mv 'mapped_reads.bam' '$output_mapped_reads' #end if ]]></command> <inputs> <expand macro="input_type_cond"/> <expand macro="reference_source_cond"/> - <param name="output_sort" type="select" label="BAM sorting mode" help="The 'Not sorted' option can significantly extend the run time of the tool (it runs using a single thread)."> - <option value="coordinate" selected="True">Sort by chromosomal coordinates</option> - <option value="name">Sort by read names</option> - <option value="unsorted">Not sorted (sorted as input)</option> - </param> + <section name="mapping_options" title="Mapping options"> + <param argument="maxindel" type="integer" value="16000" label="Maximum indel length" help="Don't look for indels longer than this (lower is faster)"/> + <param argument="strictmaxindel" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Do not allow indels longer than the specified maximum indel length?" help="By default these are not sought, but may be found anyway"/> + <param argument="tipsearch" type="integer" value="100" label="Look this far for read-end deletions with anchors shorter than K using brute force"/> + <param argument="minid" type="float" value="0.76" label="Approximate minimum alignment identity to look for" help="Higher is faster and less sensitive"/> + <param argument="minhits" type="integer" value="1" label="Minimum number of seed hits required for candidate sites" help="Higher is faster"/> + <param argument="local" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Use local (rather than global) alignments?"/> + <param argument="perfectmode" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Allow only perfect mappings?"/> + <param argument="semiperfectmode" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Allow only perfect and semiperfect (perfect except for N's in the reference) mappings?"/> + <param name="ambiguous" type="select" label="Specify behavior on ambiguously-mapped reads with multiple top-scoring mapping locations"> + <option value="best" selected="true">Use the first best site</option> + <option value="toss">Consider unmapped</option> + <option value="random">Select one top-scoring site randomly</option> + <option value="all">Retain all top-scoring sites</option> + </param> + <param argument="samestrandpairs" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Map paired reads to the same strand?" help="No value will map to opposite strands"/> + <param argument="requirecorrectstrand" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Forbid pairing of reads without correct strand orientation?" help="Select No for long-mate-pair libraries"/> + <param argument="killbadpairs" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Mark the read with the lower mapping quality as unmapped if a read pair is mapped with an inappropriate insert size or orientation?"/> + <param argument="pairedonly" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Treat unpaired reads as unmapped?"/> + <param argument="rcomp" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Reverse complement both reads prior to mapping (for LMP outward-facing libraries)?"/> + <param argument="rcompmate" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Reverse complement read2 prior to mapping?"/> + <param argument="pairlen" type="integer" value="32000" label="Maximum allowed distance between paired reads"/> + <param argument="rescuedist" type="integer" value="1200" label="Maximum average insert size for trying to rescue paired reads" help="Lower is faster"/> + <param argument="rescuemismatches" type="integer" value="32" label="Maximum mismatches allowed in a rescued read" help="Lower is faster"/> + <param argument="averagepairdist" type="integer" value="100" label="Initial average distance between paired reads" help="Varies dynamically; does not need to be specified"/> + <param argument="deterministic" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Run in deterministic mode?" help="BBMap is deterministic without this flag if using single-ended reads?"/> + <param argument="bandwidthratio" type="integer" value="0" label="Restrict alignment band to this fraction of read length" help="Zero value ignores"/> + <param argument="bandwidth" type="integer" value="0" label="Set the bandwidth directly to this fraction of read length" help="Faster but less accurate"/> + <param argument="maxsites2" type="integer" value="800" label="Maximum number of alignments per read to output"/> + <param argument="ignorefrequentkmers" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Discard low-information kmers that occur often?"/> + <param argument="excludefraction" type="float" value="0.03" label="Fraction of kmers to ignore" help="For example, 0.03 will ignore the most common 3% of kmers"/> + <param argument="greedy" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Use a greedy algorithm to discard the least-useful kmers on a per-read basis?"/> + <param argument="kfilter" type="integer" value="0" label="Potential mapping sites must have at least this many consecutive exact matches" help="Zero value ignores"/> + </section> + <section name="qt_options" title="Quality and trimming options"> + <param name="qtrim" type="select" label="Select option for quality trimming ends before mapping"> + <option value="f" selected="true">No trimming</option> + <option value="l">Trim left</option> + <option value="r">Trim right</option> + <option value="lr">Trim both</option> + </param> + <param argument="untrim" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Undo trimming after mapping?" help="Untrimmed bases will be soft-clipped in cigar strings"/> + <param argument="trimq" type="integer" value="6" label="Trim regions with average quality below this value"/> + <param argument="mintrimlength" type="integer" value="60" label="Don't trim reads to be shorter than this value"/> + <param argument="fakefastaquality" type="integer" value="-1" max="50" label="Set to a positive number 1-50 to generate fake quality strings for fasta input reads"/> + <param argument="ignorebadquality" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Keep going, rather than crashing, if a read has out-of-range quality values?"/> + <param argument="usequality" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Use quality scores when determining which read kmers to use as seeds?"/> + <param argument="minaveragequality" type="integer" value="0" label="Do not map reads with average quality below this value"/> + <param argument="maqb" type="integer" value="0" label="Calculate maq from this many initial bases" help="Zero value ignores"/> + </section> + <section name="output_options" title="Output options"> + <param argument="secondary" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Output secondary alignments?"/> + <param argument="maxsites" type="integer" value="5" label="Maximum number of total alignments to output per read" help="Relevant only when outputting secondary alignments"/> + <param argument="sssr" type="float" value="0.95" label="Output only secondary alignments with score of at least this fraction of primary"/> + <param argument="ssao" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Only output secondary alignments for ambiguously-mapped reads?"/> + <param argument="quickmatch" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Generate cigar strings more quickly?"/> + <param argument="trimreaddescriptions" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Truncate read and ref names at the first whitespace, assuming that the remainder is a comment or description?"/> + <param argument="machineout" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Output statistics in machine-friendly (key=value) format?"/> + <param argument="printunmappedcount" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Output the total number of unmapped reads and bases?" help="If input is paired, the number will be of pairs for which both reads are unmapped"/> + <param argument="renamebyinsert" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Rename reads based on their mapped insert size?"/> + <param name="output_sort" type="select" label="BAM sorting mode" help="The 'Not sorted' option can significantly extend the run time of the tool (it runs using a single thread)"> + <option value="coordinate" selected="True">Sort by chromosomal coordinates</option> + <option value="name">Sort by read names</option> + <option value="unsorted">Not sorted (sorted as input)</option> + </param> + </section> + <section name="pf_options" title="Post-filtering options"> + <param argument="idfilter" type="integer" value="0" min="0" max="1" label="Specify exact minimum identity allowed for alignments to be output" help="Independent of approximate minimum alignment identity to look for"/> + <param argument="subfilter" type="integer" value="-1" label="Ban alignments with more than this many substitutions" help="Negative value ignores"/> + <param argument="insfilter" type="integer" value="-1" label="Ban alignments with more than this many insertions" help="Negative value ignores"/> + <param argument="delfilter" type="integer" value="-1" label="Ban alignments with more than this many deletions" help="Negative value ignores"/> + <param argument="indelfilter" type="integer" value="-1" label="Ban alignments with more than this many indels" help="Negative value ignores"/> + <param argument="editfilter" type="integer" value="-1" label="Ban alignments with more than this many edits" help="Negative value ignores"/> + <param argument="inslenfilter" type="integer" value="-1" label="Ban alignments with an insertion longer than this" help="Negative value ignores"/> + <param argument="dellenfilter" type="integer" value="-1" label="Ban alignments with a deletion longer than this" help="Negative value ignores"/> + <param argument="nfilter" type="integer" value="-1" label="Ban alignments with more than this many ns (includes nocall, noref, and off scaffold ends)" help="Negative value ignores"/> + </section> </inputs> <outputs> - <data format="bam" name="output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> + <data format="bam" name="output_all_reads" label="${tool.name} on ${on_string} (all reads)"> + <expand macro="dbKeyActionsBBMap"/> + <change_format> + <when input="output_options.output_sort" value="name" format="qname_sorted.bam"/> + <when input="output_options.output_sort" value="unsorted" format="qname_input_sorted.bam"/> + </change_format> + </data> + <data format="bam" name="output_unmapped_reads" label="${tool.name} on ${on_string} (unmapped reads)"> <expand macro="dbKeyActionsBBMap"/> <change_format> - <when input="output_sort" value="name" format="qname_sorted.bam" /> - <when input="output_sort" value="unsorted" format="qname_input_sorted.bam" /> + <when input="output_options.output_sort" value="name" format="qname_sorted.bam"/> + <when input="output_options.output_sort" value="unsorted" format="qname_input_sorted.bam"/> + </change_format> + </data> + <data format="bam" name="output_mapped_reads" label="${tool.name} on ${on_string} (mapped reads)"> + <expand macro="dbKeyActionsBBMap"/> + <change_format> + <when input="output_options.output_sort" value="name" format="qname_sorted.bam"/> + <when input="output_options.output_sort" value="unsorted" format="qname_input_sorted.bam"/> </change_format> </data> </outputs> <tests> <!-- Single file, cached reference, output coordinate sorted --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <param name="input_type" value="single"/> <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/> - <output name="output" file="output1.bam" ftype="bam" lines_diff="4"> - <metadata name="dbkey" value="89" /> + <param name="output_sort" value="coordinate"/> + <output name="output_all_reads" ftype="bam"> + <metadata name="dbkey" value="89"/> + <assert_contents> + <has_size value="9433" delta="300"/> + </assert_contents> + </output> + <output name="output_unmapped_reads" ftype="bam"> + <metadata name="dbkey" value="89"/> + <assert_contents> + <has_size value="9432" delta="300"/> + </assert_contents> + </output> + <output name="output_mapped_reads" ftype="bam"> + <metadata name="dbkey" value="89"/> + <assert_contents> + <has_size value="938" delta="100"/> + </assert_contents> </output> </test> <!-- Paired reads in separate datasets, cached reference, output name sorted --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <param name="input_type" value="pair"/> <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/> <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz"/> <param name="output_sort" value="name"/> - <output name="output" file="output2.bam" ftype="qname_sorted.bam" lines_diff="4"> - <metadata name="dbkey" value="89" /> + <output name="output_all_reads" ftype="qname_sorted.bam"> + <metadata name="dbkey" value="89"/> + <assert_contents> + <has_size value="17103" delta="600"/> + </assert_contents> + </output> + <output name="output_unmapped_reads" ftype="qname_sorted.bam"> + <metadata name="dbkey" value="89"/> + <assert_contents> + <has_size value="17105" delta="600"/> + </assert_contents> + </output> + <output name="output_mapped_reads" ftype="qname_sorted.bam"> + <metadata name="dbkey" value="89"/> + <assert_contents> + <has_size value="967" delta="100"/> + </assert_contents> </output> </test> <!-- Collection of Paired reads, history reference, output unsorted --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <param name="input_type" value="paired"/> <param name="reads_collection"> <collection type="paired"> @@ -107,8 +322,23 @@ <param name="ref_source" value="history"/> <param name="reference" value="NC_002945v4.fasta" dbkey="89" ftype="fasta"/> <param name="output_sort" value="unsorted"/> - <output name="output" file="output3.bam" ftype="qname_input_sorted.bam" lines_diff="4"> - <metadata name="dbkey" value="89" /> + <output name="output_all_reads" ftype="qname_input_sorted.bam"> + <metadata name="dbkey" value="89"/> + <assert_contents> + <has_size value="17059" delta="600"/> + </assert_contents> + </output> + <output name="output_unmapped_reads" ftype="qname_input_sorted.bam"> + <metadata name="dbkey" value="89"/> + <assert_contents> + <has_size value="17059" delta="600"/> + </assert_contents> + </output> + <output name="output_mapped_reads" ftype="qname_input_sorted.bam"> + <metadata name="dbkey" value="89"/> + <assert_contents> + <has_size value="906" delta="100"/> + </assert_contents> </output> </test> </tests>
--- a/macros.xml Tue Feb 01 23:01:08 2022 +0000 +++ b/macros.xml Thu Feb 03 21:13:25 2022 +0000 @@ -1,10 +1,10 @@ <macros> <token name="@WRAPPER_VERSION@">1.0.0</token> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> <token name="@PROFILE@">20.09</token> <xml name="requirements"> <requirements> - <requirement type="package" version="38.92">bbmap</requirement> + <requirement type="package" version="38.93">bbmap</requirement> </requirements> </xml> <macro name="dbKeyActionsBBMap">