Mercurial > repos > iuc > mothur_cluster_split
diff cluster.split.xml @ 2:3c24b99497db draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit 3418f23b9768f5aafb86488f5ec1cb97530d4fb3
author | iuc |
---|---|
date | Tue, 20 Mar 2018 22:16:50 -0400 |
parents | e70a33ec8f3b |
children | 2c02989afecb |
line wrap: on
line diff
--- a/cluster.split.xml Tue Sep 05 17:13:33 2017 -0400 +++ b/cluster.split.xml Tue Mar 20 22:16:50 2018 -0400 @@ -1,135 +1,170 @@ <tool profile="16.07" id="mothur_cluster_split" name="Cluster.split" version="@WRAPPER_VERSION@.0"> - <description>Assign sequences to OTUs (Operational Taxonomic Unit) splits large matrices</description> + <description>Assign sequences to OTUs and split large matrices</description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements"/> + <expand macro="requirements"> + <requirement type="package" version="2.6.0">vsearch</requirement> + </expand> <expand macro="stdio"/> <expand macro="version_command"/> <command><![CDATA[ - @SHELL_OPTIONS@ +@SHELL_OPTIONS@ - ## create symlinks to input datasets - #if $splitby.splitmethod == "distance": - ln -s "$splitby.matrix.dist" splitby.matrix.dist.dat && - ln -s "$splitby.matrix.nameOrCount" splitby.matrix.nameOrCount.dat && - #elif $splitby.splitmethod == "classify": - ln -s "$splitby.dist" splitby.dist.dat && - ln -s "$splitby.nameOrCount" splitby.nameOrCount.dat && - ln -s "$splitby.taxonomy" splitby.taxonomy.dat && - #elif $splitby.splitmethod == "fasta": - ln -s "$splitby.fasta" splitby.fasta.dat && - ln -s "$splitby.nameOrCount" splitby.nameOrCount.dat && - ln -s "$splitby.taxonomy" splitby.taxonomy.dat && - #end if +## create symlinks to input datasets +#if $splitby.splitmethod == "distance": + ln -s '$splitby.matrix.dist' splitby.matrix.dist.dat && + ln -s '$splitby.matrix.nameOrCount' splitby.matrix.nameOrCount.dat && +#elif $splitby.splitmethod == "classify": + ln -s '$splitby.dist' splitby.dist.dat && + ln -s '$splitby.nameOrCount' splitby.nameOrCount.dat && + ln -s '$splitby.taxonomy' splitby.taxonomy.dat && +#elif $splitby.splitmethod == "fasta": + ln -s '$splitby.fasta' splitby.fasta.dat && + ln -s '$splitby.nameOrCount' splitby.nameOrCount.dat && + ln -s '$splitby.taxonomy' splitby.taxonomy.dat && +#end if - echo 'cluster.split( - splitmethod=$splitby.splitmethod, - #if $splitby.splitmethod == "distance": - #if $splitby.matrix.format == "column": - column=splitby.matrix.dist.dat, - #if $splitby.matrix.nameOrCount.is_of_type("mothur.names"): - name=splitby.matrix.nameOrCount.dat, - #elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"): - count=splitby.matrix.nameOrCount.dat, - #end if - #elif $splitby.matrix.format == "phylip": - phylip=splitby.matrix.dist.dat, - #if $splitby.matrix.nameOrCount: - #if $splitby.matrix.nameOrCount.is_of_type("mothur.names"): - name=splitby.matrix.nameOrCount.dat, - #elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"): - count=splitby.matrix.nameOrCount.dat, - #end if - #end if +echo 'cluster.split( + splitmethod=$splitby.splitmethod, + #if $splitby.splitmethod == "distance": + #if $splitby.matrix.format == "column": + column=splitby.matrix.dist.dat, + #if $splitby.matrix.nameOrCount.is_of_type("mothur.names"): + name=splitby.matrix.nameOrCount.dat, + #elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"): + count=splitby.matrix.nameOrCount.dat, + #end if + #elif $splitby.matrix.format == "phylip": + phylip=splitby.matrix.dist.dat, + #if $splitby.matrix.nameOrCount: + #if $splitby.matrix.nameOrCount.is_of_type("mothur.names"): + name=splitby.matrix.nameOrCount.dat, + #elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"): + count=splitby.matrix.nameOrCount.dat, #end if - #elif $splitby.splitmethod == "classify": - column=splitby.dist.dat, - taxonomy=splitby.taxonomy.dat, - #if $splitby.nameOrCount.is_of_type("mothur.names"): - name=splitby.nameOrCount.dat, - #elif $splitby.nameOrCount.is_of_type("mothur.count_table"): - count=splitby.nameOrCount.dat, - #end if - #if $splitby.taxlevel: - taxlevel=$splitby.taxlevel, - #end if - #elif $splitby.splitmethod == "fasta": - fasta=splitby.fasta.dat, - taxonomy=splitby.taxonomy.dat, - #if $splitby.nameOrCount.is_of_type("mothur.names"): - name=splitby.nameOrCount.dat, - #elif $splitby.nameOrCount.is_of_type("mothur.count_table"): - count=splitby.nameOrCount.dat, - #end if - #if $splitby.taxlevel: - taxlevel=$splitby.taxlevel, - #end if - classic=$splitby.classic, #end if - #if $method: - method=$method, - #end if - #if float($cutoff) > 0.0: - cutoff=$cutoff, - #end if - hard=$hard, - #if $precision - precision=$precision, - #end if - large=$large, - cluster=$cluster, - processors='\${GALAXY_SLOTS:-8}' - )' - | sed 's/ //g' ## mothur trips over whitespace - | mothur - | tee mothur.out.log + #end if + #elif $splitby.splitmethod == "classify": + column=splitby.dist.dat, + taxonomy=splitby.taxonomy.dat, + #if $splitby.nameOrCount.is_of_type("mothur.names"): + name=splitby.nameOrCount.dat, + #elif $splitby.nameOrCount.is_of_type("mothur.count_table"): + count=splitby.nameOrCount.dat, + #end if + #if $splitby.taxlevel: + taxlevel=$splitby.taxlevel, + #end if + #elif $splitby.splitmethod == "fasta": + fasta=splitby.fasta.dat, + taxonomy=splitby.taxonomy.dat, + #if $splitby.nameOrCount.is_of_type("mothur.names"): + name=splitby.nameOrCount.dat, + #elif $splitby.nameOrCount.is_of_type("mothur.count_table"): + count=splitby.nameOrCount.dat, + #end if + #if $splitby.taxlevel: + taxlevel=$splitby.taxlevel, + #end if + classic=$splitby.classic, + #end if + method=$splitby.condmethod.method, + #if $splitby.condmethod.method == "opti": + metric=$splitby.condmethod.metric, + initialize=$splitby.condmethod.initialize, + delta=$splitby.condmethod.delta, + iters=$splitby.condmethod.iters, + #end if + #if float($cutoff) > 0.0: + cutoff=$cutoff, + #end if + #if $precision + precision=$precision, + #end if + large=$large, + cluster=$cluster, + runsensspec=$runsensspec, + processors='\${GALAXY_SLOTS:-8}' +)' +| sed 's/ //g' ## mothur trips over whitespace +| mothur +| tee mothur.out.log ]]></command> <inputs> <conditional name="splitby"> - <param name="splitmethod" type="select" label="Split by" help=""> + <param name="splitmethod" type="select" label="Split by" help="VSEARCH methods (agc and dgc) require a fasta file"> <option value="distance">Distance</option> <option value="classify">Classification</option> <option value="fasta">Classification using fasta</option> </param> <when value="distance"> <conditional name="matrix"> - <param name="format" type="select" label="Select a Distance Matrix Format" help=""> + <param name="format" type="select" label="Select a Distance Matrix Format"> <option value="column">Pairwise Column Matrix</option> <option value="phylip">Phylip Distance Matrix</option> </param> <when value="column"> - <param name="dist" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/> + <param name="dist" argument="column" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/> <param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" label="name file or count table - Sequences Name reference"/> </when> <when value="phylip"> - <param name="dist" type="data" format="mothur.dist,mothur.lower.dist,mothur.square.dist" label="phylip - Distance Matrix"/> + <param name="dist" argument="phylip" type="data" format="mothur.dist,mothur.lower.dist,mothur.square.dist" label="phylip - Distance Matrix"/> <param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" optional="true" label="name file or count table - Sequences Name reference"/> </when> </conditional> + <conditional name="condmethod"> + <expand macro="param-clustermethods"/> + <when value="furthest"/> + <when value="nearest"/> + <when value="average"/> + <when value="opti"> + <expand macro="params-opticlust"/> + </when> + </conditional> </when> <when value="classify"> - <param name="dist" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/> - <param name="taxonomy" type="data" format="mothur.seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/> + <param name="dist" argument="column" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/> + <param argument="taxonomy" type="data" format="mothur.seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/> <param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" label="name file or count table - Sequences Name reference"/> - <param name="taxlevel" type="integer" value="1" min="1" label="taxlevel - taxonomy level for split (default=1)" help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/> + <param argument="taxlevel" type="integer" value="1" min="1" label="taxlevel - taxonomy level for split (default=1)" + help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/> + <conditional name="condmethod"> + <expand macro="param-clustermethods"/> + <when value="furthest"/> + <when value="nearest"/> + <when value="average"/> + <when value="opti"> + <expand macro="params-opticlust"/> + </when> + </conditional> </when> <when value="fasta"> - <param name="fasta" type="data" format="mothur.align,fasta" label="fasta - Aligned Sequences" help="must be aligned sequences (mothur.align)"/> - <param name="taxonomy" type="data" format="mothur.seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/> + <param argument="fasta" type="data" format="mothur.align,fasta" label="Fasta"/> + <param argument="taxonomy" type="data" format="mothur.seq.taxonomy" label="Taxonomy" help="can be obtained by running classify.seqs"/> <param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" label="name file or count table - Sequences Name reference"/> - <param name="taxlevel" type="integer" value="3" min="1" label="taxlevel - taxonomy level for split (default=3)" help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/> - <param name="classic" type="boolean" checked="false" truevalue="true" falsevalue="false" label="classic - Use cluster.classic"/> + <param argument="taxlevel" type="integer" value="3" min="1" label="taxlevel - taxonomy level for split (default=3)" + help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/> + <param argument="classic" type="boolean" checked="false" truevalue="true" falsevalue="false" label="classic - Use cluster.classic"/> + <conditional name="condmethod"> + <expand macro="param-clustermethods"> + <expand macro="option-vsearch-clustermethods"/> + </expand> + <when value="furthest"/> + <when value="nearest"/> + <when value="average"/> + <when value="agc"/> + <when value="dgc"/> + <when value="opti"> + <expand macro="params-opticlust"/> + </when> + </conditional> </when> </conditional> - <param name="method" type="select" label="method - Select a Clustering Method" help=""> - <option value="furthest">Furthest neighbor</option> - <option value="nearest">Nearest neighbor</option> - <option value="average" selected="true">Average neighbor</option> - </param> - <param name="cutoff" type="float" value="0.0" min="0.0" label="cutoff - Distance Cutoff threshold - ignored if not > 0" help="Ignore pairwise distances larger than this, a common value would be 0.25"/> - <param name="hard" type="boolean" checked="true" truevalue="true" falsevalue="false" label="hard - Use hard cutoff instead of rounding" help=""/> - <param name="precision" type="select" optional="true" label="precision - Precision for rounding distance values" help="Set higher precision for longer genome scale sequence lengths"> + <param argument="cutoff" type="float" value="0.0" min="0.0" label="cutoff - Distance Cutoff threshold - ignored if not > 0" + help="Ignore pairwise distances larger than this, a common value would be 0.25"/> + <param argument="precision" type="select" optional="true" label="precision - Precision for rounding distance values" + help="Set higher precision for longer genome scale sequence lengths"> <option value="10">.1</option> <option value="100" selected="true">.01</option> <option value="1000">.001</option> @@ -137,8 +172,12 @@ <option value="100000">.00001</option> <option value="1000000">.000001</option> </param> - <param name="large" type="boolean" checked="false" truevalue="true" falsevalue="false" label="large - distance matrix is too large to fit in RAM" help="If your job fails due to not enough memory error, set this to true to rerun"/> - <param name="cluster" type="boolean" falsevalue="false" truevalue="true" checked="true" label="The cluster parameter allows you to indicate whether you want to run the clustering or just split the distance matrix, default=T"/> + <param argument="large" type="boolean" checked="false" truevalue="true" falsevalue="false" label="large - distance matrix is too large to fit in RAM" + help="If your job fails due to not enough memory error, set this to true to rerun"/> + <param argument="cluster" type="boolean" falsevalue="false" truevalue="true" checked="true" label="perform clustering?" + help="indicate whether you want to run the clustering or just split the distance matrix"/> + <param argument="runsensspec" type="boolean" truevalue="true" falsevalue="false" checked="true" label="runsensspec" help="run the sens.spec command on the completed list file"/> + <expand macro="param-savelog"/> </inputs> <outputs> <expand macro="logfile-output"/> @@ -162,15 +201,36 @@ <data name="splitfile" format="txt" from_work_dir="splitby.*.file" label="${tool.name} on ${on_string}: split.file"> <filter>not cluster</filter> </data> + <data name="sensspec" format="txt" from_work_dir="splitby.*.sensspec" label="${tool.name} on ${on_string}: sensspec"> + <filter>runsensspec and splitby['condmethod']['method'] == "opti"</filter> + </data> </outputs> <tests> <test><!-- test with distance method --> <param name="splitmethod" value="distance"/> <param name="format" value="phylip"/> <param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/> - <output name="otulist" md5="2613ef0a1805ba9de012a41e938d8947" ftype="mothur.list"/> - <output name="rabund" md5="4df813ec2d51c373a846a82380c7a1f8" ftype="mothur.rabund"/> - <output name="sabund" md5="8d6813a5e8d2ad426a0ee5fdd99f1a19" ftype="mothur.sabund"/> + <param name="method" value="average"/> + <output name="otulist" ftype="mothur.list"> + <assert_contents> + <expand macro="test-list-format"/> + <has_text text="unique"/> + <has_text text="U68680"/> + </assert_contents> + </output> + <output name="rabund" ftype="mothur.rabund"> + <assert_contents> + <has_line_matching expression="^unique(\t\d+)+$"/> + <has_text text="0.03"/> + </assert_contents> + </output> + <output name="sabund" ftype="mothur.sabund"> + <assert_contents> + <has_line_matching expression="^unique(\t\d+)+$"/> + <has_text text="0.03"/> + </assert_contents> + </output> + <param name="savelog" value="true"/> <expand macro="logfile-test"/> </test> <test><!-- test with cluster false --> @@ -178,6 +238,7 @@ <param name="format" value="phylip"/> <param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/> <param name="cluster" value="false"/> + <param name="method" value="average"/> <output name="splitfile" ftype="txt"> <assert_contents> <has_text text="column"/> @@ -186,12 +247,22 @@ <has_text text="temp"/> </assert_contents> </output> - <output_collection name="splitnames" count="4"> - <element name="0" md5="27037eeb3e696888b24653d0996261cd" ftype="mothur.names"/> + <output_collection name="splitnames" count="14"> + <element name="0" ftype="mothur.names"> + <assert_contents> + <has_text text="U68591"/> + <has_text text="U68600"/> + </assert_contents> + </element> </output_collection> - <output_collection name="splitdist" count="3"> - <element name="4" md5="f751aee00b598d3b6691d34f67dbc8d5" ftype="mothur.dist"/> + <output_collection name="splitdist" count="13"> + <element name="4" ftype="mothur.dist"> + <assert_contents> + <has_line_matching expression="^U\d+\tU\d+\t\d+\.\d+$"/> + </assert_contents> + </element> </output_collection> + <param name="savelog" value="true"/> <expand macro="logfile-test"/> </test> <test><!-- test with classify method (mothur.names input file) --> @@ -200,9 +271,11 @@ <param name="dist" value="amazon.pair.dist" ftype="mothur.pair.dist"/> <param name="nameOrCount" value="amazon.names" ftype="mothur.names"/> <param name="taxonomy" value="amazon.wang.wang.taxonomy" ftype="mothur.seq.taxonomy"/> + <param name="method" value="average"/> <output name="otulist" md5="d6eba624ad79759c530b9bc3285a1361" ftype="mothur.list"/> <output name="rabund" md5="2a165e1e40644fccb8cc9f53d8915bc3" ftype="mothur.rabund"/> <output name="sabund" md5="7aad8a9ca0eade414d6eba1f8bef960f" ftype="mothur.sabund"/> + <param name="savelog" value="true"/> <expand macro="logfile-test"/> </test> <test><!-- test with classify method (mothur.count_table input file) --> @@ -211,7 +284,9 @@ <param name="dist" value="amazon.pair.dist" ftype="mothur.pair.dist"/> <param name="nameOrCount" value="amazon.count_table" ftype="mothur.count_table"/> <param name="taxonomy" value="amazon.wang.wang.taxonomy" ftype="mothur.seq.taxonomy"/> + <param name="method" value="average"/> <output name="otulist" md5="c5c28330434d3e773221f635d04d6af9" ftype="mothur.list"/> + <param name="savelog" value="true"/> <expand macro="logfile-test"/> </test> <test><!-- test with fasta --> @@ -220,14 +295,59 @@ <param name="nameOrCount" value="amazon.align_head.names" ftype="mothur.names"/> <param name="taxonomy" value="amazon.align_head.wang.taxonomy" ftype="mothur.seq.taxonomy"/> <param name="cutoff" value="9999"/> + <param name="method" value="average"/> <output name="otulist" md5="a1279248cf2bc1094e0046b2cff1b785" ftype="mothur.list"/> <output name="rabund" md5="65ec9f326cd92fc607679b9902ec8430" ftype="mothur.rabund"/> <output name="sabund" md5="854d3acd15f64299c5d9d9e18f2d51b4" ftype="mothur.sabund"/> + <param name="savelog" value="true"/> + <expand macro="logfile-test"/> + </test> + <test><!-- test with vsearch executable (agc/dgc method) --> + <param name="splitmethod" value="fasta"/> + <param name="fasta" value="amazon.align_head" ftype="mothur.align"/> + <param name="nameOrCount" value="amazon.align_head.names" ftype="mothur.names"/> + <param name="taxonomy" value="amazon.align_head.wang.taxonomy" ftype="mothur.seq.taxonomy"/> + <param name="method" value="agc"/> + <output name="otulist" md5="0fbc5bf21331538dd50b6586c4005edc" ftype="mothur.list"/> + <output name="rabund" md5="dcccca11d9fa7a186cd93e9d4592f832" ftype="mothur.rabund"/> + <output name="sabund" md5="167815924b1b2b4d4e5e7468d41256cb" ftype="mothur.sabund"/> + <param name="savelog" value="true"/> + <expand macro="logfile-test"/> + </test> + <test><!-- test with opticlust method --> + <param name="splitmethod" value="distance"/> + <param name="format" value="phylip"/> + <param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/> + <param name="method" value="opti"/> + <output name="otulist" ftype="mothur.list"> + <assert_contents> + <expand macro="test-list-format"/> + <has_text text="0.03"/> + </assert_contents> + </output> + <output name="rabund" ftype="mothur.rabund"> + <assert_contents> + <expand macro="test-rabund-format"/> + <has_text text="0.03"/> + </assert_contents> + </output> + <output name="sabund" ftype="mothur.sabund"> + <assert_contents> + <expand macro="test-sabund-format"/> + <has_text text="0.03"/> + </assert_contents> + </output> + <output name="sensspec" ftype="txt"> + <assert_contents> + <expand macro="test-sensspec-format"/> + <has_text text="0.03"/> + </assert_contents> + </output> + <param name="savelog" value="true"/> <expand macro="logfile-test"/> </test> </tests> - <help> -<![CDATA[ + <help><![CDATA[ @MOTHUR_OVERVIEW@ @@ -240,7 +360,6 @@ v1.28.0: Upgraded to Mothur 1.33, introduced cluster boolean. -]]> - </help> + ]]></help> <expand macro="citations"/> </tool>