Mercurial > repos > iuc > dada2_makesequencetable
diff dada2_makeSequenceTable.xml @ 0:6e0946238688 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author | iuc |
---|---|
date | Fri, 08 Nov 2019 18:49:57 -0500 |
parents | |
children | 9ccec6ed8e82 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dada2_makeSequenceTable.xml Fri Nov 08 18:49:57 2019 -0500 @@ -0,0 +1,113 @@ +<tool id="dada2_makeSequenceTable" name="dada2: makeSequenceTable" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09"> + <description>construct a sequence table (analogous to OTU table)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ + Rscript '$dada2_script' + ]]></command> + <configfiles> + <configfile name="dada2_script"><![CDATA[ +@READ_FOO@ +@WRITE_FOO@ + +library(dada2, quietly=T) +#if $plot == "yes" +library(ggplot2, quietly=T) +#end if + +samples <- list() +#for $s in $samples: + samples[["$s.element_identifier"]] <- readRDS('$s') +#end for + +seqtab <- makeSequenceTable(samples, orderBy = "$orderBy") + +reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum) +df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen) + +#if $plot == "yes" +pdf( '$plot_output' ) +ggplot(data=df, aes(x=length, y=count)) + + geom_col() + +#if $filter_cond.filter_select != "no" + geom_vline( xintercept=c($filter_cond.min-0.5, $filter_cond.max+0.5) ) + +#end if + theme_bw() +bequiet <- dev.off() +#end if + +## filter by seqlengths +#if $filter_cond.filter_select != "no" + seqtab <- seqtab[, nchar(colnames(seqtab)) %in% seq($filter_cond.min, $filter_cond.max), drop=F] +#end if +write.data( seqtab, '$stable', "dada2_sequencetable" ) + ]]></configfile> + </configfiles> + <inputs> + <param argument="samples" type="data" multiple="true" format="@DADA_UNIQUES@" label="samples" /> + <param argument="orderBy" type="select" label="Column order"> + <option value="abundance">abundance</option> + <option value="nsamples">nsamples</option> + </param> + <conditional name="filter_cond"> + <param name="filter_select" type="select" label="Length filter method"> + <option value="no">No filter</option> + <option value="minmax">Specify minimum and maximum sequence lengths</option> + </param> + <when value="no"/> + <when value="minmax"> + <param name="min" type="integer" value="" label="Minimum sequence length"/> + <param name="max" type="integer" value="" label="Maximum sequence length"/> + </when> + </conditional> + <param name="plot" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="plot sequence length distribution" /> + </inputs> + <outputs> + <data name="stable" format="dada2_sequencetable" label="${tool.name} on ${on_string}"/> + <data name="plot_output" format="pdf" label="${tool.name} on ${on_string}: sequence length distribution"> + <filter>plot</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="2"> + <param name="samples" ftype="dada2_mergepairs" value="mergePairs_F3D0.Rdata"/> + <output name="stable" value="makeSequenceTable_F3D0.tab" ftype="dada2_sequencetable" lines_diff="2"/> + <output name="plot_output" value="makeSequenceTable_F3D0.pdf" ftype="pdf" /> + </test> + <test expect_num_outputs="1"> + <param name="samples" ftype="dada2_mergepairs" value="mergePairs_F3D0.Rdata"/> + <param name="filter_cond|filter_select" value="minmax"/> + <param name="filter_cond|min" value="200"/> + <param name="filter_cond|max" value="300"/> + <param name="plot" value="no" /> + <output name="stable" value="makeSequenceTable_F3D0.tab" ftype="dada2_sequencetable" lines_diff="2" /> + </test> + </tests> + <help><![CDATA[ +Description +........... + +This function constructs a sequence table -- more precisely an amplicon sequence variant table (ASV) table -- a higher-resolution version of the OTU table produced by traditional methods. + +The sequence table is a matrix with rows corresponding to (and named by) the samples, and columns corresponding to (and named by) the sequence variants. + +Usage +..... + +**Input**: The result of dada, or mergePairs. + +**Output**: A data set of type dada2_sequencetable, i.e. a tabular with a row for each sample, and a column for each unique sequence across all the samples. The columns are named by the sequence. + +Details +....... + +Sequences that are much longer or shorter than expected may be the result of non-specific priming. You can remove non-target-length by applying a length filter. This is analogous to “cutting a band” in-silico to get amplicons of the targeted length. + +@HELP_OVERVIEW@ + ]]></help> + <expand macro="citations"/> +</tool>