diff dada2_seqCounts.xml @ 0:d26cea4b4cc4 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author iuc
date Fri, 08 Nov 2019 18:50:51 -0500
parents
children f74c56549143
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dada2_seqCounts.xml	Fri Nov 08 18:50:51 2019 -0500
@@ -0,0 +1,156 @@
+<tool id="dada2_seqCounts" name="dada2: sequence counts" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+    Rscript '$dada2_script'
+    ]]></command>
+    <configfiles>
+        <configfile name="dada2_script"><![CDATA[
+@READ_FOO@
+library(dada2, quietly=T)
+
+getN <- function(x){ sum(getUniques(x)) }
+
+df <- NULL
+#for $i, $rep in enumerate($inrep)
+    samples = list()
+    #for $s in $rep.input:
+        ## for collection input assume identifiers are sample names
+        #if $s.ext in ["tabular", "dada2_dada", "dada2_mergepairs"]
+            sample_name <- '$s.element_identifier'
+            samples[[sample_name]] <- $read_data( $s )
+        #else
+            samples <- $read_data( $s )
+        #end if
+        #if $s.ext == "tabular"
+            tabular <- T
+        #else
+            tabular <- F
+        #end if
+    #end for
+
+    #if str($rep.name) == ""
+        dname <- '$str(i)'
+    #else
+        dname <- '$rep.name'
+    #end if
+
+    if( tabular ){
+        tdf <- NULL
+        for( n in names( samples ) ){
+            if(is.null(tdf)){
+                tdf <- samples[[n]]
+            }else{
+                tdf <- rbind(tdf, samples[[n]])
+            }
+        }
+        names(tdf) <- paste( dname, names(tdf) )
+        tdf <- cbind( data.frame(samples=names( samples )), tdf)
+    }else{
+        if(is.null(names(samples))){
+            tdf <- data.frame( samples = row.names(samples) )
+        }else{
+            tdf <- data.frame( samples = names(samples) )
+        }
+        t <- tryCatch({
+            sapply(samples, getN)
+        },
+        error=function(cond) {
+            rowSums(samples)
+        })
+        tdf[[ dname ]] <- t
+    }
+    if(is.null(df)){
+        df <- tdf
+    }else{
+        df <- merge( df, tdf, by="samples", all=T, no.dups=T)
+    }
+#end for
+write.table(df, "$counts", quote=F, sep="\t", row.names = F, col.names = T)
+    ]]></configfile>
+    </configfiles>
+    <inputs>
+        <repeat name="inrep" title="data sets" min="1">
+            <param name="input" type="data" multiple="true" format="tabular,@DADA_UNIQUES@,dada2_sequencetable,dada2_uniques" label="Dataset(s)"/>
+            <param name="name" type="text" value="" optional="true" label="name"/>
+        </repeat>
+    </inputs>
+    <outputs>
+        <data name="counts" format="tabular" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <tests>
+        <!-- test for the separate inputs (didn't implement as single test using the repeat
+             since the sample name won't match anyway [galaxy does not allow to specify the
+             names of the elements of the input collection for <param ... multiple="true">]) -->
+        <test>
+            <repeat name="inrep">
+                <param name="input" value="filterAndTrim_F3D0.tab" ftype="tabular"/>
+                <param name="name" value="filter"/>
+            </repeat>
+            <output name="counts" value="seqCounts_F3D0_filter.tab" ftype="tabular" />
+        </test>
+        <test>
+            <repeat name="inrep">
+                <param name="input" value="dada_F3D0_R1.Rdata" ftype="dada2_dada"/>
+                <param name="name" value="dadaF"/>
+            </repeat>
+            <output name="counts" value="seqCounts_F3D0_dadaF.tab" ftype="tabular" />
+        </test>
+        <test>
+            <repeat name="inrep">
+                <param name="input" value="mergePairs_F3D0.Rdata" ftype="dada2_mergepairs"/>
+                <param name="name" value="merge"/>
+            </repeat>
+            <output name="counts" value="seqCounts_F3D0_merge.tab" ftype="tabular" />
+        </test>
+        <test>
+            <repeat name="inrep">
+                <param name="input" value="makeSequenceTable_F3D0.tab" ftype="dada2_sequencetable"/>
+                <param name="name" value="seqtab"/>
+            </repeat>
+            <output name="counts" value="seqCounts_F3D0_seqtab.tab" ftype="tabular" />
+        </test>
+        <test>
+            <repeat name="inrep">
+                <param name="input" value="removeBimeraDenovo_F3D0.tab" ftype="dada2_sequencetable"/>
+                <param name="name" value="nochim"/>
+            </repeat>
+            <output name="counts" value="seqCounts_F3D0_nochim.tab" ftype="tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+Description
+...........
+
+Get the counts of sequences per sample for the different stages of the dada pipeline.
+
+Usage
+.....
+
+**Inputs:**
+
+Any number of results of dada2 steps in the following form:
+- a collection of results from dada, or mergePairs; or the collection of statistics from filterAndTrim (the identifiers of the collection elements are used as sample names)
+- the result of makeSequenceTable or removeBimeraDenovo
+
+**Output:**
+
+A table containing the number of sequences per sample (rows) for each input (columns)
+
+Details
+.......
+
+For results from
+
+- dada, and mergePairs the sum of the result of dada2's getUniques function is used
+- makeSequenceTable, and removeBimeraDenovo R's rowSums function is used
+
+@HELP_OVERVIEW@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>