Mercurial > repos > iuc > disco
view disco.xml @ 0:6d447ebb2bf4 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/disco/ commit c1327b5793477da611684e92e0ce4635370c7666
author | iuc |
---|---|
date | Fri, 03 Nov 2017 15:44:23 -0400 |
parents | |
children | dfefaf2bab95 |
line wrap: on
line source
<tool id="disco" name="DISCO" version="@WRAPPER_VERSION@.0"> <description>to assemble metagenomics data using an overlap-layout-consensus (OLC) approach</description> <macros> <token name="@WRAPPER_VERSION@">1.2</token> <xml name="assembly_params"> <param argument="minSequenceLengthTobePrinted" type="integer" value="1000" min="0" label="Minimum length of contigs or scaffolds to be printed"/> <param argument="MinOverlap4SimplifyGraph" type="integer" value="30" min="0" label="Minimum overlap length (bp) required to keep an edge between two reads during graph simplification" help="This minimum overlap length must be equal to 30 or larger than the MinOverlap4BuildGraph above. This allows you to try different minimum overlap lengths for assembly without redoing assembly graph construction. Edges with shorter overlap length than this parameter will be ignored during graph simplification. Increase this to reduce N50 and mis-assemblies"/> <param argument="minOverlapDifference4ClipBranches" type="integer" value="25" min="0" label="Minimum overlap length difference (bp) to clip branches" help="If a read has multiple edges, Disco clips the branches with overlap lengths less than the largest overlap of this read by this difference or more. Increase this to reduce N50 and mis-assemblies"/> <param argument="minUniquePEsupport" type="integer" value="3" min="0" label="Minumum number of paired-end reads that provide unique support to merge two edges" help="Increase this to reduce N50 and mis-assemblies"/> <param argument="minNonUniquePEsupport" type="integer" value="0" min="0" label="Minumum number of paired-end reads that provide non-unique support to merge two edgess"/> <param argument="minReadsCountInEdgeToBeNotDeadEnd" type="integer" value="10" min="0" label="Minimum number of reads in an edge to be not dead-end edge"/> <param argument="minEdgeLengthToBeNotDeadEnd" type="integer" value="1000" min="0" label="Minimum edge length (bp) to be not dead-end edge"/> <param argument="minFoldToBeShortBranch" type="integer" value="5" min="0" label="Minimum fold difference between two branches' lengths to consider a branch to be short"/> <param argument="minReadsCountToHave0Flow" type="integer" value="15" min="0" label="Minimum number of reads for an edge to be kept even if it has 0 flow"/> <param argument="minEdgeLengthToHave0Flow" type="integer" value="1500" min="0" label="Minimum edge length for an edge to be kept even if it has 0 flow"/> <param argument="minReadsCountInEdgeToBe1MinFlow" type="integer" value="20" min="0" label="Minimum number of reads in an edge to be assigned with 1 minimum flow"/> <param argument="minEdgeLengthToBe1MinFlow" type="integer" value="2000" min="0" label="Minimum edge length to be assigned with 1 minimum flow"/> </xml> </macros> <requirements> <requirement type="package" version="@WRAPPER_VERSION@">disco</requirement> </requirements> <version_command>runDisco.sh --version</version_command> <command detect_errors="exit_code"><![CDATA[ runDisco.sh -n \${GALAXY_SLOTS:-4} -m \${DISCO_MAX_MEMORY:-4} -d . -o disco #if $input.library_type == "single" -inS '$input.inS' #else #if $input.paired_input.type == "separated" -in1 '$input.paired_input.in1' -in2 '$input.paired_input.in2' #else -inP '$input.paired_input.inP' #end if #end if $obg $osg -p '$first_assembly_iteration_params' -p2 '$second_assembly_iteration_params' -p3 '$third_assembly_iteration_params' ]]></command> <configfiles> <configfile name="first_assembly_iteration_params"> <![CDATA[ MinOverlap4BuildGraph = $MinOverlap4BuildGraph PrintContigs = true PrintScaffolds = true PrintUnused = $PrintUnused PrintGFA = $PrintGFA PrintGFA2 = $PrintGFA2 minSequenceLengthTobePrinted = $first_assembly_iter_param.minSequenceLengthTobePrinted MinOverlap4SimplifyGraph = $first_assembly_iter_param.MinOverlap4SimplifyGraph minOverlapDifference4ClipBranches = $first_assembly_iter_param.minOverlapDifference4ClipBranches minUniquePEsupport = $first_assembly_iter_param.minUniquePEsupport minNonUniquePEsupport = $first_assembly_iter_param.minNonUniquePEsupport minReadsCountInEdgeToBeNotDeadEnd = $first_assembly_iter_param.minReadsCountInEdgeToBeNotDeadEnd minEdgeLengthToBeNotDeadEnd = $first_assembly_iter_param.minEdgeLengthToBeNotDeadEnd minFoldToBeShortBranch = $first_assembly_iter_param.minFoldToBeShortBranch minReadsCountToHave0Flow = $first_assembly_iter_param.minReadsCountToHave0Flow minEdgeLengthToHave0Flow = $first_assembly_iter_param.minEdgeLengthToHave0Flow minReadsCountInEdgeToBe1MinFlow = $first_assembly_iter_param.minReadsCountInEdgeToBe1MinFlow minEdgeLengthToBe1MinFlow = $first_assembly_iter_param.minEdgeLengthToBe1MinFlow ]]></configfile> <configfile name="second_assembly_iteration_params"> <![CDATA[ MinOverlap4BuildGraph = $MinOverlap4BuildGraph PrintContigs = true PrintScaffolds = true PrintUnused = $PrintUnused PrintGFA = $PrintGFA PrintGFA2 = $PrintGFA2 minSequenceLengthTobePrinted = $second_assembly_iter_param.minSequenceLengthTobePrinted MinOverlap4SimplifyGraph = $second_assembly_iter_param.MinOverlap4SimplifyGraph minOverlapDifference4ClipBranches = $second_assembly_iter_param.minOverlapDifference4ClipBranches minUniquePEsupport = $second_assembly_iter_param.minUniquePEsupport minNonUniquePEsupport = $second_assembly_iter_param.minNonUniquePEsupport minReadsCountInEdgeToBeNotDeadEnd = $second_assembly_iter_param.minReadsCountInEdgeToBeNotDeadEnd minEdgeLengthToBeNotDeadEnd = $second_assembly_iter_param.minEdgeLengthToBeNotDeadEnd minFoldToBeShortBranch = $second_assembly_iter_param.minFoldToBeShortBranch minReadsCountToHave0Flow = $second_assembly_iter_param.minReadsCountToHave0Flow minEdgeLengthToHave0Flow = $second_assembly_iter_param.minEdgeLengthToHave0Flow minReadsCountInEdgeToBe1MinFlow = $second_assembly_iter_param.minReadsCountInEdgeToBe1MinFlow minEdgeLengthToBe1MinFlow = $second_assembly_iter_param.minEdgeLengthToBe1MinFlow ]]></configfile> <configfile name="third_assembly_iteration_params"> <![CDATA[ MinOverlap4BuildGraph = $MinOverlap4BuildGraph PrintContigs = true PrintScaffolds = true PrintUnused = $PrintUnused PrintGFA = $PrintGFA PrintGFA2 = $PrintGFA2 minSequenceLengthTobePrinted = $third_assembly_iter_param.minSequenceLengthTobePrinted MinOverlap4SimplifyGraph = $third_assembly_iter_param.MinOverlap4SimplifyGraph minOverlapDifference4ClipBranches = $third_assembly_iter_param.minOverlapDifference4ClipBranches minUniquePEsupport = $third_assembly_iter_param.minUniquePEsupport minNonUniquePEsupport = $third_assembly_iter_param.minNonUniquePEsupport minReadsCountInEdgeToBeNotDeadEnd = $third_assembly_iter_param.minReadsCountInEdgeToBeNotDeadEnd minEdgeLengthToBeNotDeadEnd = $third_assembly_iter_param.minEdgeLengthToBeNotDeadEnd minFoldToBeShortBranch = $third_assembly_iter_param.minFoldToBeShortBranch minReadsCountToHave0Flow = $third_assembly_iter_param.minReadsCountToHave0Flow minEdgeLengthToHave0Flow = $third_assembly_iter_param.minEdgeLengthToHave0Flow minReadsCountInEdgeToBe1MinFlow = $third_assembly_iter_param.minReadsCountInEdgeToBe1MinFlow minEdgeLengthToBe1MinFlow = $third_assembly_iter_param.minEdgeLengthToBe1MinFlow ]]></configfile> </configfiles> <inputs> <conditional name="input"> <param name="library_type" type="select" label="Type of library?"> <option value="single">Single-end reads</option> <option value="paired">Paired-end reads</option> </param> <when value="single"> <param argument="-inS" type="data" format="fastq,fasta" multiple="true" label="Single-read files"/> </when> <when value="paired"> <conditional name="paired_input"> <param name="type" type="select" label="Type of library?"> <option value="separated">Separated forward/reverse paired-read files</option> <option value="interleaved">Interleaved paired-read files</option> </param> <when value="separated"> <param argument="-in1" type="data" format="fastq,fasta" label="Forward read file" /> <param argument="-in2" type="data" format="fastq,fasta" label="Reverse read file" /> </when> <when value="interleaved"> <param argument="-inP" type="data" format="fastq,fasta" multiple="true" label="Interleaved paired-read files" /> </when> </conditional> </when> </conditional> <param argument="-obg" type="boolean" truevalue="-obg" falsevalue="" checked="false" label="Only build overlap graph?"/> <param argument="-osg" type="boolean" truevalue="-osg" falsevalue="" checked="false" label="Only simplify existing overlap graph?"/> <param argument="MinOverlap4BuildGraph" type="integer" min="0" value="20" label="Minimum overlap length (bp) required to insert an edge between two reads during graph construction" help="Increase this to reduce N50 and mis-assemblies"/> <param name="log" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output log file?"/> <param argument="PrintScaffolds" type="boolean" truevalue="true" falsevalue="false" label="Print scaffolds?"/> <param argument="PrintUnused" type="boolean" truevalue="true" falsevalue="false" label="Print unused reads?"/> <param argument="PrintGFA" type="boolean" truevalue="true" falsevalue="false" label="Print GFA graph format?"/> <param argument="PrintGFA2" type="boolean" truevalue="true" falsevalue="false" label="Print GFA2 graph format?"/> <section name="first_assembly_iter_param" title="Parameters for 1st assembly iteration" expanded="False"> <expand macro="assembly_params"/> </section> <section name="second_assembly_iter_param" title="Parameters for 2nd assembly iteration" expanded="False"> <expand macro="assembly_params"/> </section> <section name="third_assembly_iter_param" title="Parameters for 3rd assembly iteration" expanded="False"> <expand macro="assembly_params"/> </section> </inputs> <outputs> <data format="txt" name="log" from_work_dir="disco.log" label="${tool.name} on ${on_string}: Log file"> <filter>log</filter> </data> <data format="fasta" name="contigs" from_work_dir="disco_contigsFinalCombined.fasta" label="${tool.name} on ${on_string}: Contigs"/> <data format="fasta" name="scaffolds" from_work_dir="disco_scaffoldsFinalCombined.fasta" label="${tool.name} on ${on_string}: Scaffolds"/> <collection name="unused_reads" type="list" label="${tool.name} on ${on_string}: Unused reads"> <filter>PrintUnused</filter> <discover_datasets pattern="disco_0_Unused(?P<designation>.+)\.fasta" format="fasta" directory="assembly" /> </collection> <collection name="gfa_graphs" type="list" label="${tool.name} on ${on_string}: GFA graph"> <filter>PrintGFA</filter> <discover_datasets pattern="disco_Graph_(?P<designation>.+)\.gfa" format="txt" directory="assembly" /> </collection> <collection name="gfa2_graphs" type="list" label="${tool.name} on ${on_string}: GFA2 graph"> <filter>PrintGFA2</filter> <discover_datasets pattern="disco_Graph_(?P<designation>.+)\.gfa2" format="txt" directory="assembly" /> </collection> </outputs> <tests> <test> <conditional name="input"> <param name="library_type" value="paired"/> <conditional name="paired_input"> <param name="type" value="separated"/> <param name="in1" value="ecoli_1K_1.fq"/> <param name="in2" value="ecoli_1K_2.fq"/> </conditional> </conditional> <param name="obg" value=""/> <param name="osg" value=""/> <param name="MinOverlap4BuildGraph" value="20"/> <param name="log" value="true"/> <param name="PrintUnused" value="true"/> <param name="PrintGFA" value="true"/> <param name="PrintGFA2" value="true"/> <section name="first_assembly_iter_param"> <param name="minSequenceLengthTobePrinted" value="1000"/> <param name="MinOverlap4SimplifyGraph" value="30"/> <param name="minOverlapDifference4ClipBranches" value="25"/> <param name="minUniquePEsupport" value="3"/> <param name="minNonUniquePEsupport" value="0"/> <param name="minReadsCountInEdgeToBeNotDeadEnd" value="10"/> <param name="minEdgeLengthToBeNotDeadEnd" value="1000"/> <param name="minFoldToBeShortBranch" value="5"/> <param name="minReadsCountToHave0Flow" value="15"/> <param name="minEdgeLengthToHave0Flow" value="1500"/> <param name="minReadsCountInEdgeToBe1MinFlow" value="20"/> <param name="minEdgeLengthToBe1MinFlow" value="2000"/> </section> <section name="second_assembly_iter_param"> <param name="minSequenceLengthTobePrinted" value="1000"/> <param name="MinOverlap4SimplifyGraph" value="30"/> <param name="minOverlapDifference4ClipBranches" value="25"/> <param name="minUniquePEsupport" value="3"/> <param name="minNonUniquePEsupport" value="0"/> <param name="minReadsCountInEdgeToBeNotDeadEnd" value="10"/> <param name="minEdgeLengthToBeNotDeadEnd" value="1000"/> <param name="minFoldToBeShortBranch" value="5"/> <param name="minReadsCountToHave0Flow" value="15"/> <param name="minEdgeLengthToHave0Flow" value="1500"/> <param name="minReadsCountInEdgeToBe1MinFlow" value="20"/> <param name="minEdgeLengthToBe1MinFlow" value="2000"/> </section> <section name="third_assembly_iter_param"> <param name="minSequenceLengthTobePrinted" value="1000"/> <param name="MinOverlap4SimplifyGraph" value="30"/> <param name="minOverlapDifference4ClipBranches" value="25"/> <param name="minUniquePEsupport" value="3"/> <param name="minNonUniquePEsupport" value="0"/> <param name="minReadsCountInEdgeToBeNotDeadEnd" value="10"/> <param name="minEdgeLengthToBeNotDeadEnd" value="1000"/> <param name="minFoldToBeShortBranch" value="5"/> <param name="minReadsCountToHave0Flow" value="15"/> <param name="minEdgeLengthToHave0Flow" value="1500"/> <param name="minReadsCountInEdgeToBe1MinFlow" value="20"/> <param name="minEdgeLengthToBe1MinFlow" value="2000"/> </section> <output name="log"> <assert_contents> <has_text text="Software: Disco Assembler" /> <has_text text="FIRST LOOP ITERATION 1" /> <has_text text="2054 reads streamed from this read file" /> <has_text text="Graph simplification" /> </assert_contents> </output> <output name="contigs" value="test1_contigs.fasta" compare="sim_size" delta="5"/> <output name="scaffolds" value="test1_scaffolds.fasta" compare="sim_size" delta="5"/> <output_collection name="unused_reads" type="list"> <element name="PairedReads1"> <assert_contents> <has_text text="EAS20_8_6_1_9_1972/1" /> <has_text text="EAS20_8_6_10_415_709/1" /> <has_text text="EAS20_8_6_36_1124_470/1" /> <has_text text="EAS20_8_6_100_1637_1332/1" /> </assert_contents> </element> <element name="PairedReads2"> <assert_contents> <has_text text="EAS20_8_6_1_9_1972/2" /> <has_text text="EAS20_8_6_10_415_709/2" /> <has_text text="EAS20_8_6_36_1124_470/2" /> <has_text text="EAS20_8_6_100_1637_1332/2" /> </assert_contents> </element> </output_collection> <output_collection name="gfa_graphs" type="list"> <element name="1" file="test1_graph_1.gfa" compare="sim_size" delta="6000"/> </output_collection> <output_collection name="gfa2_graphs" type="list"> <element name="1" file="test1_graph_1.gfa2" compare="sim_size" delta="6000"/> </output_collection> </test> <test> <conditional name="input"> <param name="library_type" value="paired"/> <conditional name="paired_input"> <param name="type" value="interleaved"/> <param name="inP" value="ecoli_1K_interleaved.fq,ecoli_1K_interleaved.fq"/> </conditional> </conditional> <param name="obg" value=""/> <param name="osg" value=""/> <param name="MinOverlap4BuildGraph" value="20"/> <param name="log" value="true"/> <param name="PrintUnused" value="false"/> <param name="PrintGFA" value="false"/> <param name="PrintGFA2" value="false"/> <section name="first_assembly_iter_param"> <param name="minSequenceLengthTobePrinted" value="1000"/> <param name="MinOverlap4SimplifyGraph" value="30"/> <param name="minOverlapDifference4ClipBranches" value="25"/> <param name="minUniquePEsupport" value="3"/> <param name="minNonUniquePEsupport" value="0"/> <param name="minReadsCountInEdgeToBeNotDeadEnd" value="10"/> <param name="minEdgeLengthToBeNotDeadEnd" value="1000"/> <param name="minFoldToBeShortBranch" value="5"/> <param name="minReadsCountToHave0Flow" value="15"/> <param name="minEdgeLengthToHave0Flow" value="1500"/> <param name="minReadsCountInEdgeToBe1MinFlow" value="20"/> <param name="minEdgeLengthToBe1MinFlow" value="2000"/> </section> <section name="second_assembly_iter_param"> <param name="minSequenceLengthTobePrinted" value="1000"/> <param name="MinOverlap4SimplifyGraph" value="30"/> <param name="minOverlapDifference4ClipBranches" value="25"/> <param name="minUniquePEsupport" value="3"/> <param name="minNonUniquePEsupport" value="0"/> <param name="minReadsCountInEdgeToBeNotDeadEnd" value="10"/> <param name="minEdgeLengthToBeNotDeadEnd" value="1000"/> <param name="minFoldToBeShortBranch" value="5"/> <param name="minReadsCountToHave0Flow" value="15"/> <param name="minEdgeLengthToHave0Flow" value="1500"/> <param name="minReadsCountInEdgeToBe1MinFlow" value="20"/> <param name="minEdgeLengthToBe1MinFlow" value="2000"/> </section> <section name="third_assembly_iter_param"> <param name="minSequenceLengthTobePrinted" value="1000"/> <param name="MinOverlap4SimplifyGraph" value="30"/> <param name="minOverlapDifference4ClipBranches" value="25"/> <param name="minUniquePEsupport" value="3"/> <param name="minNonUniquePEsupport" value="0"/> <param name="minReadsCountInEdgeToBeNotDeadEnd" value="10"/> <param name="minEdgeLengthToBeNotDeadEnd" value="1000"/> <param name="minFoldToBeShortBranch" value="5"/> <param name="minReadsCountToHave0Flow" value="15"/> <param name="minEdgeLengthToHave0Flow" value="1500"/> <param name="minReadsCountInEdgeToBe1MinFlow" value="20"/> <param name="minEdgeLengthToBe1MinFlow" value="2000"/> </section> <output name="log"> <assert_contents> <has_text text="Software: Disco Assembler" /> <has_text text="FIRST LOOP ITERATION 1" /> <has_text text="4108 reads streamed from this read file" /> <has_text text="Graph simplification" /> </assert_contents> </output> <output name="contigs" value="test2_contigs.fasta" compare="sim_size" delta="5"/> <output name="scaffolds" value="test2_scaffolds.fasta" compare="sim_size" delta="5"/> </test> <test> <conditional name="input"> <param name="library_type" value="single"/> <param name="inS" value="ecoli_1K_1.fq"/> </conditional> <param name="obg" value=""/> <param name="osg" value=""/> <param name="MinOverlap4BuildGraph" value="20"/> <param name="log" value="false"/> <param name="PrintUnused" value="false"/> <param name="PrintGFA" value="false"/> <param name="PrintGFA2" value="false"/> <section name="first_assembly_iter_param"> <param name="minSequenceLengthTobePrinted" value="1000"/> <param name="MinOverlap4SimplifyGraph" value="30"/> <param name="minOverlapDifference4ClipBranches" value="25"/> <param name="minUniquePEsupport" value="3"/> <param name="minNonUniquePEsupport" value="0"/> <param name="minReadsCountInEdgeToBeNotDeadEnd" value="10"/> <param name="minEdgeLengthToBeNotDeadEnd" value="1000"/> <param name="minFoldToBeShortBranch" value="5"/> <param name="minReadsCountToHave0Flow" value="15"/> <param name="minEdgeLengthToHave0Flow" value="1500"/> <param name="minReadsCountInEdgeToBe1MinFlow" value="20"/> <param name="minEdgeLengthToBe1MinFlow" value="2000"/> </section> <section name="second_assembly_iter_param"> <param name="minSequenceLengthTobePrinted" value="1000"/> <param name="MinOverlap4SimplifyGraph" value="30"/> <param name="minOverlapDifference4ClipBranches" value="25"/> <param name="minUniquePEsupport" value="3"/> <param name="minNonUniquePEsupport" value="0"/> <param name="minReadsCountInEdgeToBeNotDeadEnd" value="10"/> <param name="minEdgeLengthToBeNotDeadEnd" value="1000"/> <param name="minFoldToBeShortBranch" value="5"/> <param name="minReadsCountToHave0Flow" value="15"/> <param name="minEdgeLengthToHave0Flow" value="1500"/> <param name="minReadsCountInEdgeToBe1MinFlow" value="20"/> <param name="minEdgeLengthToBe1MinFlow" value="2000"/> </section> <section name="third_assembly_iter_param"> <param name="minSequenceLengthTobePrinted" value="1000"/> <param name="MinOverlap4SimplifyGraph" value="30"/> <param name="minOverlapDifference4ClipBranches" value="25"/> <param name="minUniquePEsupport" value="3"/> <param name="minNonUniquePEsupport" value="0"/> <param name="minReadsCountInEdgeToBeNotDeadEnd" value="10"/> <param name="minEdgeLengthToBeNotDeadEnd" value="1000"/> <param name="minFoldToBeShortBranch" value="5"/> <param name="minReadsCountToHave0Flow" value="15"/> <param name="minEdgeLengthToHave0Flow" value="1500"/> <param name="minReadsCountInEdgeToBe1MinFlow" value="20"/> <param name="minEdgeLengthToBe1MinFlow" value="2000"/> </section> <output name="contigs" value="test3_contigs.fasta" compare="sim_size" delta="5"/> <output name="scaffolds" value="test3_scaffolds.fasta" compare="sim_size" delta="5"/> </test> </tests> <help><![CDATA[ DISCO is a multi threaded and multiprocess distributed memory overlap-layout-consensus (OLC) metagenome assembler. Disco was developed as a scalable assembler to assemble large metagenomes from billions of Illumina sequencing reads of complex microbial communities. Disco was parallelized for computer clusters in a hybrid architecture that integrated shared-memory multi-threading, point-to-point message passing, and remote direct memory access. The assembly and scaffolding were performed using an iterative overlap graph approach. The detailed user manual of the assembler and how to use it to acheive best results is provided here: http://disco.omicsbio.org/user-manual. This is a quick start guide generally for developers and testers. Users with limited experience with genome assembly are advised to use the user manual. ]]></help> <citations> </citations> </tool>