Mercurial > repos > iuc > mothur_cluster_split

diff cluster.split.xml @ 2:3c24b99497db draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit 3418f23b9768f5aafb86488f5ec1cb97530d4fb3
author: iuc
date: Tue, 20 Mar 2018 22:16:50 -0400
parents: e70a33ec8f3b
children: 2c02989afecb
--- a/cluster.split.xml	Tue Sep 05 17:13:33 2017 -0400
+++ b/cluster.split.xml	Tue Mar 20 22:16:50 2018 -0400
@@ -1,135 +1,170 @@
 <tool profile="16.07" id="mothur_cluster_split" name="Cluster.split" version="@WRAPPER_VERSION@.0">
-    <description>Assign sequences to OTUs (Operational Taxonomic Unit) splits large matrices</description>
+    <description>Assign sequences to OTUs and split large matrices</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements"/>
+    <expand macro="requirements">
+        <requirement type="package" version="2.6.0">vsearch</requirement>
+    </expand>
     <expand macro="stdio"/>
     <expand macro="version_command"/>
     <command><![CDATA[
-        @SHELL_OPTIONS@
+@SHELL_OPTIONS@
 
-        ## create symlinks to input datasets
-        #if $splitby.splitmethod == "distance":
-            ln -s "$splitby.matrix.dist" splitby.matrix.dist.dat &&
-            ln -s "$splitby.matrix.nameOrCount" splitby.matrix.nameOrCount.dat &&
-        #elif $splitby.splitmethod == "classify":
-            ln -s "$splitby.dist" splitby.dist.dat &&
-            ln -s "$splitby.nameOrCount" splitby.nameOrCount.dat &&
-            ln -s "$splitby.taxonomy" splitby.taxonomy.dat &&
-        #elif $splitby.splitmethod == "fasta":
-            ln -s "$splitby.fasta" splitby.fasta.dat &&
-            ln -s "$splitby.nameOrCount" splitby.nameOrCount.dat &&
-            ln -s "$splitby.taxonomy" splitby.taxonomy.dat &&
-        #end if
+## create symlinks to input datasets
+#if $splitby.splitmethod == "distance":
+    ln -s '$splitby.matrix.dist' splitby.matrix.dist.dat &&
+    ln -s '$splitby.matrix.nameOrCount' splitby.matrix.nameOrCount.dat &&
+#elif $splitby.splitmethod == "classify":
+    ln -s '$splitby.dist' splitby.dist.dat &&
+    ln -s '$splitby.nameOrCount' splitby.nameOrCount.dat &&
+    ln -s '$splitby.taxonomy' splitby.taxonomy.dat &&
+#elif $splitby.splitmethod == "fasta":
+    ln -s '$splitby.fasta' splitby.fasta.dat &&
+    ln -s '$splitby.nameOrCount' splitby.nameOrCount.dat &&
+    ln -s '$splitby.taxonomy' splitby.taxonomy.dat &&
+#end if
 
-        echo 'cluster.split(
-            splitmethod=$splitby.splitmethod,
-            #if $splitby.splitmethod == "distance":
-                #if $splitby.matrix.format == "column":
-                    column=splitby.matrix.dist.dat,
-                    #if $splitby.matrix.nameOrCount.is_of_type("mothur.names"):
-                        name=splitby.matrix.nameOrCount.dat,
-                    #elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"):
-                        count=splitby.matrix.nameOrCount.dat,
-                    #end if
-                #elif $splitby.matrix.format == "phylip":
-                    phylip=splitby.matrix.dist.dat,
-                    #if $splitby.matrix.nameOrCount:
-                        #if $splitby.matrix.nameOrCount.is_of_type("mothur.names"):
-                            name=splitby.matrix.nameOrCount.dat,
-                        #elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"):
-                            count=splitby.matrix.nameOrCount.dat,
-                        #end if
-                    #end if
+echo 'cluster.split(
+    splitmethod=$splitby.splitmethod,
+    #if $splitby.splitmethod == "distance":
+        #if $splitby.matrix.format == "column":
+            column=splitby.matrix.dist.dat,
+            #if $splitby.matrix.nameOrCount.is_of_type("mothur.names"):
+                name=splitby.matrix.nameOrCount.dat,
+            #elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"):
+                count=splitby.matrix.nameOrCount.dat,
+            #end if
+        #elif $splitby.matrix.format == "phylip":
+            phylip=splitby.matrix.dist.dat,
+            #if $splitby.matrix.nameOrCount:
+                #if $splitby.matrix.nameOrCount.is_of_type("mothur.names"):
+                    name=splitby.matrix.nameOrCount.dat,
+                #elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"):
+                    count=splitby.matrix.nameOrCount.dat,
                 #end if
-            #elif $splitby.splitmethod == "classify":
-                column=splitby.dist.dat,
-                taxonomy=splitby.taxonomy.dat,
-                #if $splitby.nameOrCount.is_of_type("mothur.names"):
-                    name=splitby.nameOrCount.dat,
-                #elif $splitby.nameOrCount.is_of_type("mothur.count_table"):
-                    count=splitby.nameOrCount.dat,
-                #end if
-                #if $splitby.taxlevel:
-                    taxlevel=$splitby.taxlevel,
-                #end if
-            #elif $splitby.splitmethod == "fasta":
-                fasta=splitby.fasta.dat,
-                taxonomy=splitby.taxonomy.dat,
-                #if $splitby.nameOrCount.is_of_type("mothur.names"):
-                    name=splitby.nameOrCount.dat,
-                #elif $splitby.nameOrCount.is_of_type("mothur.count_table"):
-                    count=splitby.nameOrCount.dat,
-                #end if
-                #if $splitby.taxlevel:
-                    taxlevel=$splitby.taxlevel,
-                #end if
-                classic=$splitby.classic,
             #end if
-            #if $method:
-                method=$method,
-            #end if
-            #if float($cutoff) > 0.0:
-                cutoff=$cutoff,
-            #end if
-            hard=$hard,
-            #if $precision
-                precision=$precision,
-            #end if
-            large=$large,
-            cluster=$cluster,
-            processors='\${GALAXY_SLOTS:-8}'
-        )'
-        | sed 's/ //g'  ## mothur trips over whitespace
-        | mothur
-        | tee mothur.out.log
+        #end if
+    #elif $splitby.splitmethod == "classify":
+        column=splitby.dist.dat,
+        taxonomy=splitby.taxonomy.dat,
+        #if $splitby.nameOrCount.is_of_type("mothur.names"):
+            name=splitby.nameOrCount.dat,
+        #elif $splitby.nameOrCount.is_of_type("mothur.count_table"):
+            count=splitby.nameOrCount.dat,
+        #end if
+        #if $splitby.taxlevel:
+            taxlevel=$splitby.taxlevel,
+        #end if
+    #elif $splitby.splitmethod == "fasta":
+        fasta=splitby.fasta.dat,
+        taxonomy=splitby.taxonomy.dat,
+        #if $splitby.nameOrCount.is_of_type("mothur.names"):
+            name=splitby.nameOrCount.dat,
+        #elif $splitby.nameOrCount.is_of_type("mothur.count_table"):
+            count=splitby.nameOrCount.dat,
+        #end if
+        #if $splitby.taxlevel:
+            taxlevel=$splitby.taxlevel,
+        #end if
+        classic=$splitby.classic,
+    #end if
+    method=$splitby.condmethod.method,
+    #if $splitby.condmethod.method == "opti":
+        metric=$splitby.condmethod.metric,
+        initialize=$splitby.condmethod.initialize,
+        delta=$splitby.condmethod.delta,
+        iters=$splitby.condmethod.iters,
+    #end if
+    #if float($cutoff) > 0.0:
+        cutoff=$cutoff,
+    #end if
+    #if $precision
+        precision=$precision,
+    #end if
+    large=$large,
+    cluster=$cluster,
+    runsensspec=$runsensspec,
+    processors='\${GALAXY_SLOTS:-8}'
+)'
+| sed 's/ //g'  ## mothur trips over whitespace
+| mothur
+| tee mothur.out.log
     ]]></command>
     <inputs>
         <conditional name="splitby">
-            <param name="splitmethod" type="select" label="Split by" help="">
+            <param name="splitmethod" type="select" label="Split by" help="VSEARCH methods (agc and dgc) require a fasta file">
                 <option value="distance">Distance</option>
                 <option value="classify">Classification</option>
                 <option value="fasta">Classification using fasta</option>
             </param>
             <when value="distance">
                 <conditional name="matrix">
-                    <param name="format" type="select" label="Select a Distance Matrix Format" help="">
+                    <param name="format" type="select" label="Select a Distance Matrix Format">
                         <option value="column">Pairwise Column Matrix</option>
                         <option value="phylip">Phylip Distance Matrix</option>
                     </param>
                     <when value="column">
-                        <param name="dist" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/>
+                        <param name="dist" argument="column" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/>
                         <param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" label="name file or count table - Sequences Name reference"/>
                     </when>
                     <when value="phylip">
-                        <param name="dist" type="data" format="mothur.dist,mothur.lower.dist,mothur.square.dist" label="phylip - Distance Matrix"/>
+                        <param name="dist" argument="phylip" type="data" format="mothur.dist,mothur.lower.dist,mothur.square.dist" label="phylip - Distance Matrix"/>
                         <param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" optional="true" label="name file or count table - Sequences Name reference"/>
                     </when>
                 </conditional>
+                <conditional name="condmethod">
+                    <expand macro="param-clustermethods"/>
+                    <when value="furthest"/>
+                    <when value="nearest"/>
+                    <when value="average"/>
+                    <when value="opti">
+                        <expand macro="params-opticlust"/>
+                    </when>
+                </conditional>
             </when>
             <when value="classify">
-                <param name="dist" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/>
-                <param name="taxonomy" type="data" format="mothur.seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/>
+                <param name="dist" argument="column" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/>
+                <param argument="taxonomy" type="data" format="mothur.seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/>
                 <param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" label="name file or count table - Sequences Name reference"/>
-                <param name="taxlevel" type="integer" value="1" min="1" label="taxlevel - taxonomy level for split (default=1)" help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/>
+                <param argument="taxlevel" type="integer" value="1" min="1" label="taxlevel - taxonomy level for split (default=1)"
+                    help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/>
+                <conditional name="condmethod">
+                    <expand macro="param-clustermethods"/>
+                    <when value="furthest"/>
+                    <when value="nearest"/>
+                    <when value="average"/>
+                    <when value="opti">
+                        <expand macro="params-opticlust"/>
+                    </when>
+                </conditional>
             </when>
             <when value="fasta">
-                <param name="fasta" type="data" format="mothur.align,fasta" label="fasta - Aligned Sequences" help="must be aligned sequences (mothur.align)"/>
-                <param name="taxonomy" type="data" format="mothur.seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/>
+                <param argument="fasta" type="data" format="mothur.align,fasta" label="Fasta"/>
+                <param argument="taxonomy" type="data" format="mothur.seq.taxonomy" label="Taxonomy" help="can be obtained by running classify.seqs"/>
                 <param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" label="name file or count table - Sequences Name reference"/>
-                <param name="taxlevel" type="integer" value="3" min="1" label="taxlevel - taxonomy level for split (default=3)" help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/>
-                <param name="classic" type="boolean" checked="false" truevalue="true" falsevalue="false" label="classic - Use cluster.classic"/>
+                <param argument="taxlevel" type="integer" value="3" min="1" label="taxlevel - taxonomy level for split (default=3)"
+                    help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/>
+                <param argument="classic" type="boolean" checked="false" truevalue="true" falsevalue="false" label="classic - Use cluster.classic"/>
+                <conditional name="condmethod">
+                    <expand macro="param-clustermethods">
+                        <expand macro="option-vsearch-clustermethods"/>
+                    </expand>
+                    <when value="furthest"/>
+                    <when value="nearest"/>
+                    <when value="average"/>
+                    <when value="agc"/>
+                    <when value="dgc"/>
+                    <when value="opti">
+                        <expand macro="params-opticlust"/>
+                    </when>
+                </conditional>
             </when>
         </conditional>
-        <param name="method" type="select" label="method - Select a Clustering Method" help="">
-            <option value="furthest">Furthest neighbor</option>
-            <option value="nearest">Nearest neighbor</option>
-            <option value="average" selected="true">Average neighbor</option>
-        </param>
-        <param name="cutoff" type="float" value="0.0" min="0.0" label="cutoff - Distance Cutoff threshold - ignored if not > 0" help="Ignore pairwise distances larger than this, a common value would be 0.25"/>
-        <param name="hard" type="boolean" checked="true" truevalue="true" falsevalue="false" label="hard - Use hard cutoff instead of rounding" help=""/>
-        <param name="precision" type="select" optional="true" label="precision - Precision for rounding distance values" help="Set higher precision for longer genome scale sequence lengths">
+        <param argument="cutoff" type="float" value="0.0" min="0.0" label="cutoff - Distance Cutoff threshold - ignored if not > 0"
+            help="Ignore pairwise distances larger than this, a common value would be 0.25"/>
+        <param argument="precision" type="select" optional="true" label="precision - Precision for rounding distance values"
+            help="Set higher precision for longer genome scale sequence lengths">
             <option value="10">.1</option>
             <option value="100" selected="true">.01</option>
             <option value="1000">.001</option>
@@ -137,8 +172,12 @@
             <option value="100000">.00001</option>
             <option value="1000000">.000001</option>
         </param>
-        <param name="large" type="boolean" checked="false" truevalue="true" falsevalue="false" label="large - distance matrix is too large to fit in RAM" help="If your job fails due to not enough memory error, set this to true to rerun"/>
-        <param name="cluster" type="boolean" falsevalue="false" truevalue="true" checked="true" label="The cluster parameter allows you to indicate whether you want to run the clustering or just split the distance matrix, default=T"/>
+        <param argument="large" type="boolean" checked="false" truevalue="true" falsevalue="false" label="large - distance matrix is too large to fit in RAM"
+            help="If your job fails due to not enough memory error, set this to true to rerun"/>
+        <param argument="cluster" type="boolean" falsevalue="false" truevalue="true" checked="true" label="perform clustering?"
+            help="indicate whether you want to run the clustering or just split the distance matrix"/>
+        <param argument="runsensspec" type="boolean" truevalue="true" falsevalue="false" checked="true" label="runsensspec" help="run the sens.spec command on the completed list file"/>
+        <expand macro="param-savelog"/>
     </inputs>
     <outputs>
         <expand macro="logfile-output"/>
@@ -162,15 +201,36 @@
         <data name="splitfile" format="txt" from_work_dir="splitby.*.file" label="${tool.name} on ${on_string}: split.file">
             <filter>not cluster</filter>
         </data>
+        <data name="sensspec" format="txt" from_work_dir="splitby.*.sensspec" label="${tool.name} on ${on_string}: sensspec">
+            <filter>runsensspec and splitby['condmethod']['method'] == "opti"</filter>
+        </data>
     </outputs>
     <tests>
         <test><!-- test with distance method -->
             <param name="splitmethod" value="distance"/>
             <param name="format" value="phylip"/>
             <param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/>
-            <output name="otulist" md5="2613ef0a1805ba9de012a41e938d8947" ftype="mothur.list"/>
-            <output name="rabund" md5="4df813ec2d51c373a846a82380c7a1f8" ftype="mothur.rabund"/>
-            <output name="sabund" md5="8d6813a5e8d2ad426a0ee5fdd99f1a19" ftype="mothur.sabund"/>
+            <param name="method" value="average"/>
+            <output name="otulist" ftype="mothur.list">
+                <assert_contents>
+                    <expand macro="test-list-format"/>
+                    <has_text text="unique"/>
+                    <has_text text="U68680"/>
+                </assert_contents>
+            </output>
+            <output name="rabund" ftype="mothur.rabund">
+                <assert_contents>
+                    <has_line_matching expression="^unique(\t\d+)+$"/>
+                    <has_text text="0.03"/>
+                </assert_contents>
+            </output>
+            <output name="sabund" ftype="mothur.sabund">
+                <assert_contents>
+                    <has_line_matching expression="^unique(\t\d+)+$"/>
+                    <has_text text="0.03"/>
+                </assert_contents>
+            </output>
+            <param name="savelog" value="true"/>
             <expand macro="logfile-test"/>
         </test>
         <test><!-- test with cluster false -->
@@ -178,6 +238,7 @@
             <param name="format" value="phylip"/>
             <param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/>
             <param name="cluster" value="false"/>
+            <param name="method" value="average"/>
             <output name="splitfile" ftype="txt">
                 <assert_contents>
                     <has_text text="column"/>
@@ -186,12 +247,22 @@
                     <has_text text="temp"/>
                 </assert_contents>
             </output>
-            <output_collection name="splitnames" count="4">
-                <element name="0" md5="27037eeb3e696888b24653d0996261cd" ftype="mothur.names"/>
+            <output_collection name="splitnames" count="14">
+                <element name="0" ftype="mothur.names">
+                    <assert_contents>
+                        <has_text text="U68591"/>
+                        <has_text text="U68600"/>
+                    </assert_contents>
+                </element>
             </output_collection>
-            <output_collection name="splitdist" count="3">
-                <element name="4" md5="f751aee00b598d3b6691d34f67dbc8d5" ftype="mothur.dist"/>
+            <output_collection name="splitdist" count="13">
+                <element name="4" ftype="mothur.dist">
+                    <assert_contents>
+                        <has_line_matching expression="^U\d+\tU\d+\t\d+\.\d+$"/>
+                    </assert_contents>
+                </element>
             </output_collection>
+            <param name="savelog" value="true"/>
             <expand macro="logfile-test"/>
         </test>
         <test><!-- test with classify method (mothur.names input file) -->
@@ -200,9 +271,11 @@
             <param name="dist" value="amazon.pair.dist" ftype="mothur.pair.dist"/>
             <param name="nameOrCount" value="amazon.names" ftype="mothur.names"/>
             <param name="taxonomy" value="amazon.wang.wang.taxonomy" ftype="mothur.seq.taxonomy"/>
+            <param name="method" value="average"/>
             <output name="otulist" md5="d6eba624ad79759c530b9bc3285a1361" ftype="mothur.list"/>
             <output name="rabund" md5="2a165e1e40644fccb8cc9f53d8915bc3" ftype="mothur.rabund"/>
             <output name="sabund" md5="7aad8a9ca0eade414d6eba1f8bef960f" ftype="mothur.sabund"/>
+            <param name="savelog" value="true"/>
             <expand macro="logfile-test"/>
         </test>
         <test><!-- test with classify method (mothur.count_table input file) -->
@@ -211,7 +284,9 @@
             <param name="dist" value="amazon.pair.dist" ftype="mothur.pair.dist"/>
             <param name="nameOrCount" value="amazon.count_table" ftype="mothur.count_table"/>
             <param name="taxonomy" value="amazon.wang.wang.taxonomy" ftype="mothur.seq.taxonomy"/>
+            <param name="method" value="average"/>
             <output name="otulist" md5="c5c28330434d3e773221f635d04d6af9" ftype="mothur.list"/>
+            <param name="savelog" value="true"/>
             <expand macro="logfile-test"/>
         </test>
         <test><!-- test with fasta -->
@@ -220,14 +295,59 @@
             <param name="nameOrCount" value="amazon.align_head.names" ftype="mothur.names"/>
             <param name="taxonomy" value="amazon.align_head.wang.taxonomy" ftype="mothur.seq.taxonomy"/>
             <param name="cutoff" value="9999"/>
+            <param name="method" value="average"/>
             <output name="otulist" md5="a1279248cf2bc1094e0046b2cff1b785" ftype="mothur.list"/>
             <output name="rabund" md5="65ec9f326cd92fc607679b9902ec8430" ftype="mothur.rabund"/>
             <output name="sabund" md5="854d3acd15f64299c5d9d9e18f2d51b4" ftype="mothur.sabund"/>
+            <param name="savelog" value="true"/>
+            <expand macro="logfile-test"/>
+        </test>
+        <test><!-- test with vsearch executable (agc/dgc method) -->
+            <param name="splitmethod" value="fasta"/>
+            <param name="fasta" value="amazon.align_head" ftype="mothur.align"/>
+            <param name="nameOrCount" value="amazon.align_head.names" ftype="mothur.names"/>
+            <param name="taxonomy" value="amazon.align_head.wang.taxonomy" ftype="mothur.seq.taxonomy"/>
+            <param name="method" value="agc"/>
+            <output name="otulist" md5="0fbc5bf21331538dd50b6586c4005edc" ftype="mothur.list"/>
+            <output name="rabund" md5="dcccca11d9fa7a186cd93e9d4592f832" ftype="mothur.rabund"/>
+            <output name="sabund" md5="167815924b1b2b4d4e5e7468d41256cb" ftype="mothur.sabund"/>
+            <param name="savelog" value="true"/>
+            <expand macro="logfile-test"/>
+        </test>
+        <test><!-- test with opticlust method -->
+            <param name="splitmethod" value="distance"/>
+            <param name="format" value="phylip"/>
+            <param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/>
+            <param name="method" value="opti"/>
+            <output name="otulist" ftype="mothur.list">
+                <assert_contents>
+                    <expand macro="test-list-format"/>
+                    <has_text text="0.03"/>
+                </assert_contents>
+            </output>
+            <output name="rabund" ftype="mothur.rabund">
+                <assert_contents>
+                    <expand macro="test-rabund-format"/>
+                    <has_text text="0.03"/>
+                </assert_contents>
+            </output>
+            <output name="sabund" ftype="mothur.sabund">
+                <assert_contents>
+                    <expand macro="test-sabund-format"/>
+                    <has_text text="0.03"/>
+                </assert_contents>
+            </output>
+            <output name="sensspec" ftype="txt">
+                <assert_contents>
+                    <expand macro="test-sensspec-format"/>
+                    <has_text text="0.03"/>
+                </assert_contents>
+            </output>
+            <param name="savelog" value="true"/>
             <expand macro="logfile-test"/>
         </test>
     </tests>
-    <help>
-<![CDATA[
+    <help><![CDATA[
 
 @MOTHUR_OVERVIEW@
 
@@ -240,7 +360,6 @@
 
 v1.28.0: Upgraded to Mothur 1.33, introduced cluster boolean.
 
-]]>
-    </help>
+    ]]></help>
     <expand macro="citations"/>
 </tool>
author	iuc
date	Tue, 20 Mar 2018 22:16:50 -0400
parents	e70a33ec8f3b
children	2c02989afecb