diff dada2_makeSequenceTable.xml @ 0:6e0946238688 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author iuc
date Fri, 08 Nov 2019 18:49:57 -0500
parents
children 9ccec6ed8e82
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dada2_makeSequenceTable.xml	Fri Nov 08 18:49:57 2019 -0500
@@ -0,0 +1,113 @@
+<tool id="dada2_makeSequenceTable" name="dada2: makeSequenceTable" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09">
+    <description>construct a sequence table (analogous to OTU table)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+    Rscript '$dada2_script'
+    ]]></command>
+    <configfiles>
+        <configfile name="dada2_script"><![CDATA[
+@READ_FOO@
+@WRITE_FOO@
+
+library(dada2, quietly=T)
+#if $plot == "yes"
+library(ggplot2, quietly=T)
+#end if
+
+samples <- list()
+#for $s in $samples:
+    samples[["$s.element_identifier"]] <- readRDS('$s')
+#end for
+
+seqtab <- makeSequenceTable(samples, orderBy = "$orderBy")
+
+reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum)
+df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen)
+
+#if $plot == "yes"
+pdf( '$plot_output' )
+ggplot(data=df, aes(x=length, y=count)) +
+    geom_col() +
+#if $filter_cond.filter_select != "no"
+    geom_vline( xintercept=c($filter_cond.min-0.5, $filter_cond.max+0.5) ) +
+#end if
+    theme_bw()
+bequiet <- dev.off()
+#end if
+
+## filter by seqlengths
+#if $filter_cond.filter_select != "no"
+    seqtab <- seqtab[, nchar(colnames(seqtab)) %in% seq($filter_cond.min, $filter_cond.max), drop=F]
+#end if
+write.data( seqtab, '$stable', "dada2_sequencetable" )
+    ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param argument="samples" type="data" multiple="true" format="@DADA_UNIQUES@" label="samples" />
+        <param argument="orderBy" type="select" label="Column order">
+            <option value="abundance">abundance</option>
+            <option value="nsamples">nsamples</option>
+        </param>
+        <conditional name="filter_cond">
+            <param name="filter_select" type="select" label="Length filter method">
+                <option value="no">No filter</option>
+                <option value="minmax">Specify minimum and maximum sequence lengths</option>
+            </param>
+            <when value="no"/>
+            <when value="minmax">
+                <param name="min" type="integer" value="" label="Minimum sequence length"/>
+                <param name="max" type="integer" value="" label="Maximum sequence length"/>
+            </when>
+        </conditional>
+        <param name="plot" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="plot sequence length distribution" />
+    </inputs>
+    <outputs>
+        <data name="stable" format="dada2_sequencetable" label="${tool.name} on ${on_string}"/>
+        <data name="plot_output" format="pdf" label="${tool.name} on ${on_string}: sequence length distribution">
+            <filter>plot</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="samples" ftype="dada2_mergepairs" value="mergePairs_F3D0.Rdata"/>
+            <output name="stable" value="makeSequenceTable_F3D0.tab" ftype="dada2_sequencetable" lines_diff="2"/>
+            <output name="plot_output" value="makeSequenceTable_F3D0.pdf" ftype="pdf" />
+        </test>
+        <test expect_num_outputs="1">
+            <param name="samples" ftype="dada2_mergepairs" value="mergePairs_F3D0.Rdata"/>
+            <param name="filter_cond|filter_select" value="minmax"/>
+            <param name="filter_cond|min" value="200"/>
+            <param name="filter_cond|max" value="300"/>
+            <param name="plot" value="no" />
+            <output name="stable" value="makeSequenceTable_F3D0.tab" ftype="dada2_sequencetable" lines_diff="2" />
+        </test>
+    </tests>
+    <help><![CDATA[
+Description
+...........
+
+This function constructs a sequence table -- more precisely an amplicon sequence variant table (ASV) table -- a higher-resolution version of the OTU table produced by traditional methods.
+
+The sequence table is a matrix with rows corresponding to (and named by) the samples, and columns corresponding to (and named by) the sequence variants.
+
+Usage
+.....
+
+**Input**: The result of dada, or mergePairs.
+
+**Output**: A data set of type dada2_sequencetable, i.e. a tabular with a row for each sample, and a column for each unique sequence across all the samples. The columns are named by the sequence.
+
+Details
+.......
+
+Sequences that are much longer or shorter than expected may be the result of non-specific priming. You can remove non-target-length by applying a length filter. This is analogous to “cutting a band” in-silico to get amplicons of the targeted length.
+
+@HELP_OVERVIEW@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>