Mercurial > repos > iuc > abyss
diff abyss-pe.xml @ 0:f048033da666 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/abyss commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
author | iuc |
---|---|
date | Fri, 19 Jun 2015 09:26:16 -0400 |
parents | |
children | 0a5c7992b1ac |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abyss-pe.xml Fri Jun 19 09:26:16 2015 -0400 @@ -0,0 +1,244 @@ +<tool id="abyss-pe" name="ABySS" version="1.9.0.0"> + <description>de novo sequence assembler</description> + <macros> + <xml name="reads_conditional"> + <conditional name="reads"> + <param name="reads_selector" type="select" label="Type of paired-end datasets"> + <option value="paired">2 separate datasets</option> + <option value="paired_collection">1 paired dataset collection</option> + <option value="paired_il">1 dataset of interleaved reads</option> + </param> + <when value="paired"> + <param name="reads1" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads 1" /> + <param name="reads2" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads 2" /> + </when> + <when value="paired_collection"> + <param name="reads_coll" type="data_collection" collection_type="paired" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads collection" /> + </when> + <when value="paired_il"> + <param name="reads_il" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Interleaved paired-end reads" /> + </when> + </conditional> + </xml> + </macros> + <requirements> + <requirement type="package" version="1.9.0">abyss</requirement> + </requirements> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + <version_command>ABYSS --version | head -n 1</version_command> + <command> +abyss-pe name=abyss j=\${GALAXY_SLOTS:-1} k=$k +#if str($K) + K=$K +#end if +#if str($q) + q=$q +#end if +#if str($Q) + Q=$Q +#end if +#if str($e) + e=$e +#end if +#if str($E) + E=$E +#end if +#if str($t) + t=$t +#end if +#if str($c) + c=$c +#end if +#if str($b) + b=$b +#end if +#if $SS + SS=--SS +#end if +#if str($m) + m=$m +#end if +#if str($p) + p=$p +#end if +#if str($a) + a=$a +#end if +#if str($l) + l=$l +#end if +#if str($s) + s=$s +#end if +#if str($n) + n=$n +#end if +#if str($d) + d=$d +#end if +#if str($S) + S=$S +#end if +#if str($N) + N=$N +#end if +#if $lib_repeat + lib=' + #for $i in range(len($lib_repeat)) + lib$i + #end for + ' + #for $i, $v in enumerate($lib_repeat) + #if $v.reads.reads_selector == 'paired' + lib${i}='${v.reads.reads1} ${v.reads.reads2}' + #elif $v.reads.reads_selector == 'paired_collection' + lib${i}='${v.reads.reads_coll.forward} ${v.reads.reads_coll.reverse}' + #else + lib${i}='${v.reads.reads_il}' + #end if + #end for +#end if +#if str($se_reads) != 'None' + se=' + #for $v in $se_reads + $v + #end for + ' +#end if +#if $mp_repeat + mp=' + #for $i in range(len($mp_repeat)) + mp$i + #end for + ' + #for $i, $v in enumerate($mp_repeat) + #if $v.reads.reads_selector == 'paired' + mp${i}='${v.reads.reads1} ${v.reads.reads2}' + #elif $v.reads.reads_selector == 'paired_collection' + mp${i}='${v.reads.reads_coll.forward} ${v.reads.reads_coll.reverse}' + #else + mp${i}='${v.reads.reads_il}' + #end if + #end for +#end if +#if str($long_seqs) != 'None' + long=' + #for $v in $long_seqs + $v + #end for + ' +#end if + </command> + + <inputs> + <repeat name="lib_repeat" title="Paired-end library" help="(lib)"> + <expand macro="reads_conditional" /> + </repeat> + <param name="se_reads" type="data" multiple="true" optional="true" format="fasta,fastq,fastqsanger,fastqillumina" label="Single-end reads" help="(se)" /> + <repeat name="mp_repeat" title="Mate-pair library" help="The mate-pair libraries are used only for scaffolding and do not contribute towards the consensus sequence (mp)"> + <expand macro="reads_conditional" /> + </repeat> + <param name="long_seqs" type="data" multiple="true" optional="true" format="fasta,fastq,fastqsanger,fastqillumina" label="Long sequences" help="Long sequence libraries are used only for rescaffolding and do not contribute towards the consensus sequence (long)" /> + <param name="k" type="integer" value="41" label="K-mer length (in bp)" help="The length of a k-mer (when -K is not set) or the span of a k-mer pair (when K is set) (k)" /> + <param name="K" type="integer" value="" optional="true" label="The length (in bp) of a single k-mer in a k-mer pair" help="Optional (K)" /> + <param name="q" type="integer" value="3" max="40" optional="true" label="Minimum base quality when trimming" help="Trim bases from the ends of reads whose quality is less than this value (q)" /> + <param name="Q" type="integer" value="0" max="40" optional="true" label="Minimum base quality" help="Mask all bases of reads whose quality is less than this value as 'N' (Q)" /> + <param name="e" type="integer" value="" min="0" optional="true" label="Minimum erosion k-mer coverage" help="Erode bases at the ends of blunt contigs with coverage less than this value. Defaults to round(sqrt(median)) (e)" /> + <param name="E" type="integer" value="" min="0" optional="true" label="Minimum erosion k-mer coverage per strand" help="Erode bases at the ends of blunt contigs with coverage less than this value on either strand. Defaults to 1 if sqrt(median) > 2 else 0 (E)" /> + <param name="t" type="integer" value="" optional="true" label="Maximum length of blunt contigs to trim" help="Defaults to the value of k (t)" /> + <param name="c" type="float" value="" optional="true" label="Minimum mean k-mer coverage of a unitig" help="Defaults to sqrt(median) (c)" /> + <param name="b" type="integer" value="" optional="true" label="Maximum length of a bubble (in bp)" help="abyss-pe has two bubble popping stages. The default limits are 3*k bp for ABYSS and 10000 bp for PopBubbles (b)" /> + <param name="SS" type="boolean" label="Assemble in strand-specific mode" help="Requires that all libraries are strand-specific RNA-Seq libraries. Assumes that the first read in a read pair is reveresed with respect to the transcripts sequenced (SS)" /> + <param name="m" type="integer" value="50" min="0" optional="true" label="Minimum overlap of two unitigs (in bp)" help="(m)" /> + <param name="p" type="float" value="0.9" optional="true" label="Minimum sequence identity of a bubble" help="(p)" /> + <param name="a" type="integer" value="2" min="0" optional="true" label="Maximum number of branches of a bubble" help="(a)" /> + <param name="l" type="integer" value="" min="1" optional="true" label="Minimum alignment length of a read (in bp)" help="Defaults to the value of k (l)" /> + <param name="s" type="integer" value="200" min="0" optional="true" label="Minimum unitig length required for building contigs (in bp)" help="The seed length should be at least twice the value of k. If more sequence is assembled than the expected genome size, try increasing this value (s)" /> + <param name="n" type="integer" value="10" min="0" optional="true" label="Minimum number of pairs required for building contigs" help="(n)" /> + <param name="d" type="integer" value="6" min="0" optional="true" label="Allowable error of a distance estimate (in bp)" help="(d)" /> + <param name="S" type="integer" value="" min="0" optional="true" label="Minimum contig length required for building scaffolds (in bp)" help="Defaults to the value of s (S)" /> + <param name="N" type="integer" value="" min="0" optional="true" label="Minimum number of pairs required for building scaffolds" help="Defaults to the value of n (N)" /> + </inputs> + + <outputs> + <data name="unitigs" format="fasta" label="${tool.name} on ${on_string}: unitigs" from_work_dir="abyss-unitigs.fa" /> + <data name="contigs_outfile" format="fasta" label="${tool.name} on ${on_string}: contigs" from_work_dir="abyss-contigs.fa"> + <filter>lib_repeat</filter> + </data> + <data name="scaffolds" format="fasta" label="${tool.name} on ${on_string}: scaffolds" from_work_dir="abyss-scaffolds.fa"> + <filter>lib_repeat or mp_repeat</filter> + </data> + <data name="bubbles" format="fasta" label="${tool.name} on ${on_string}: bubbles" from_work_dir="abyss-bubbles.fa" /> + <data name="indels" format="fasta" label="${tool.name} on ${on_string}: indels" from_work_dir="abyss-indel.fa" /> + <data name="coverage_histogram_outfile" format="tabular" label="${tool.name} on ${on_string}: coverage histogram" from_work_dir="coverage.hist" /> + <data name="stats" format="tabular" label="${tool.name} on ${on_string}: stats" from_work_dir="abyss-stats.tab" /> + </outputs> + + <tests> + <test> + <repeat name="lib_repeat"> + <conditional name="reads"> + <param name="reads_selector" value="paired" /> + <param name="reads1" ftype="fastqsanger" value="assembly_sample-R1.fastq" /> + <param name="reads2" ftype="fastqsanger" value="assembly_sample-R2.fastq" /> + </conditional> + </repeat> + <param name="k" value="50" /> + <output name="unitigs" file="abyss-unitigs1.fa" /> + <output name="contigs_outfile" file="abyss-contigs1.fa" /> + <output name="scaffolds" file="abyss-scaffolds1.fa" /> + <output name="bubbles" file="empty_file.fasta" /> + <output name="indels" file="empty_file.fasta" /> + <output name="coverage_histogram_outfile" file="coverage1.hist" /> + <output name="stats" file="abyss-stats1.tab" /> + </test> + <test> + <param name="se_reads" ftype="fastqsanger" value="assembly_sample-R1.fastq,assembly_sample-R2.fastq" /> + <param name="k" value="50" /> + <output name="unitigs" file="abyss-unitigs2.fa" /> + <output name="bubbles" file="empty_file.fasta" /> + <output name="indels" file="empty_file.fasta" /> + <output name="coverage_histogram_outfile" file="coverage2.hist" /> + <output name="stats" file="abyss-stats2.tab" /> + </test> + </tests> + <help> +**What it does** + +`ABySS`_ is a de novo sequence assembler intended for short paired-end reads and large genomes. + +**Input** + +Input files should be FASTA or FASTQ files. + +For paired reads, a pair of reads must be named with the suffixes /1 and /2 to identify the first and second read, or the reads may be named identically. The paired reads may be in separate files or interleaved in a single file. + +At least a paired-end library or a single-end library must be provided. + +Contigs are generated only if at least a paired-end library is provided. Scaffolds are generated only if at least a paired-end library or a mate-pair library is provided. + +**Description** + +This tool performs the complete ABySS pipeline using abyss-pe, described in https://github.com/bcgsc/abyss/blob/master/doc/flowchart.pdf . + +**License and citation** + +This Galaxy tool is Copyright © 2015 `The Genome Analysis Centre (TGAC)`_ and is released under the `MIT license`_. + +.. _The Genome Analysis Centre (TGAC): http://www.tgac.ac.uk/ +.. _MIT license: http://opensource.org/licenses/MIT + +You can use this tool only if you agree to the license terms of: `ABySS`_. + +.. _ABySS: http://www.bcgsc.ca/platform/bioinfo/software/abyss + </help> + <citations> + <citation type="doi">10.1101/gr.089532.108</citation> + </citations> +</tool>