diff dada2_plotComplexity.xml @ 0:ab2030f217a9 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author iuc
date Fri, 08 Nov 2019 18:51:48 -0500
parents
children 1728e5ee871a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dada2_plotComplexity.xml	Fri Nov 08 18:51:48 2019 -0500
@@ -0,0 +1,194 @@
+<tool id="dada2_plotComplexity" name="dada2: plotComplexity" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09">
+    <description>Plot sequence complexity profile</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+##name files by linking
+#import re
+mkdir forward &&
+#if $batch_cond.paired_cond.paired_select != "single"
+    mkdir reverse &&
+#end if
+
+#if $batch_cond.batch_select == "batch":
+    #set elid = re.sub('[^\w\-\.]', '_', str($batch_cond.paired_cond.reads.element_identifier))
+    #if $batch_cond.paired_cond.paired_select != "paired"
+        ln -s '$batch_cond.paired_cond.reads' forward/'$elid' &&
+    #else
+        ln -s '$batch_cond.paired_cond.reads.forward' forward/'$elid' &&
+        ln -s '$batch_cond.paired_cond.reads.reverse' reverse/'$elid' &&
+    #end if
+    #if $batch_cond.paired_cond.paired_select == "separate"
+        ln -s '$batch_cond.paired_cond.sdaer' reverse/'$elid' &&
+    #end if
+#else
+    #for $read in $batch_cond.paired_cond.reads:
+        #set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier))
+        #if $batch_cond.paired_cond.paired_select != "paired"
+            ln -s '$read' forward/'$elid' &&
+        #else
+            ln -s '$read.forward' forward/'$elid' &&
+            ln -s '$read.reverse' reverse/'$elid' &&
+        #end if
+    #end for
+    #if $batch_cond.paired_cond.paired_select == "separate"
+        #for $read in $batch_cond.paired_cond.sdaer:
+            #set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier))
+            ln -s '$read' reverse/'$elid' &&
+        #end for
+    #end if
+#end if
+
+    Rscript --slave '$dada2_script'
+    ]]></command>
+    <configfiles>
+        <configfile name="dada2_script"><![CDATA[
+#import re
+library(ggplot2, quietly=T)
+library(dada2, quietly=T)
+
+#if $batch_cond.batch_select != "batch"
+agg <- $batch_cond.aggregate
+#else
+agg <- FALSE
+#end if
+
+#if str($window) == ""
+    wndw <- NULL
+#else
+    wndw <- $window
+#end if
+
+fwd_files <- list.files("forward", full.names=T)
+qp <- plotComplexity(fwd_files, kmerSize=$kmerSize, window=wndw, by=$by, n=$n, bins=$bins, aggregate = agg)
+ggsave('output.pdf', qp, width = 20,height = 15,units = c("cm"))
+
+#if $batch_cond.paired_cond.paired_select != "single"
+rev_files <- list.files("reverse", full.names=T)
+qp <- plotComplexity(rev_files, kmerSize=$kmerSize, window=wndw, by=$by, n=$n, bins=$bins, aggregate = agg)
+ggsave('output_rev.pdf', qp, width = 20,height = 15,units = c("cm"))
+#end if
+    ]]></configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="batch_cond">
+            <param name="batch_select" type="select" label="Processing mode" help="Joint processing processes all reads at once in a single job creating a single output (two in the case of paired data). Batch processes the samples in separate jobs and creates separate output for each">
+                <option value="joint">Joint</option>
+                <option value="batch">Batch</option>
+            </param>
+            <when value="joint">
+                <expand macro="fastq_input" multiple="True" collection_type="list:paired" argument_fwd="fl" argument_rev="fl"/>
+                <param argument="aggregate" type="boolean" label="Aggregate data" checked="True" truevalue="TRUE" falsevalue="FALSE" help="Create a single plot for all data sets (default) or a separate plot for each data set"/>
+            </when>
+            <when value="batch">
+                <expand macro="fastq_input" multiple="False" collection_type="paired" argument_fwd="fl" argument_rev="fl"/>
+            </when>
+        </conditional>
+        <param argument="kmerSize" type="integer" value="2" label="kmer size" help="kmer: also known as oligonucleotides words"/>
+        <param argument="window" type="integer" value="" optional="true" label="width (nucleotides) of the moving window" help="If not specified (default) the whole sequence is used"/>
+        <param argument="by" type="integer" value="5" label="step size (nucleotides)" help="between each moving window tested"/>
+        <param argument="n" type="integer" value="100000" label="sample number" help="number of records to sample from the fastq file"/>
+        <param argument="bins" type="integer" value="100" label="number of bins to use for the histogram" help=""/>
+    </inputs>
+    <outputs>
+        <data name="output" format="pdf" from_work_dir="output.pdf">
+            <filter>batch_cond['paired_cond']['paired_select'] == "single"</filter>
+        </data>
+        <data name="output_fwd" format="pdf" from_work_dir="output.pdf" label="${tool.name} on ${on_string}: forward reads">
+            <filter>batch_cond['paired_cond']['paired_select'] != "single"</filter>
+        </data>
+        <data name="output_rev" format="pdf" from_work_dir="output_rev.pdf" label="${tool.name} on ${on_string}: reverse reads">
+            <filter>batch_cond['paired_cond']['paired_select'] != "single"</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- all tests are against the same file using a delta that should ensure that the pdf contains a plot -->
+        <!-- paired joint, no-aggregate -->
+        <test expect_num_outputs="2">
+            <param name="batch_cond|batch_select" value="joint"/>
+            <param name="batch_cond|paired_cond|paired_select" value="paired"/>
+            <param name="batch_cond|paired_cond|reads">
+                <collection type="list:paired">
+                    <element name="F3D0_S188_L001">
+                        <collection type="paired">
+                            <element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
+                            <element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
+                         </collection>
+                    </element>
+                </collection>
+            </param>
+            <param name="batch_cond|aggregate" value="FALSE"/>
+            <output name="output_fwd" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+            <output name="output_rev" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+        </test>
+        <!-- paired-separate joint, no-aggregate (sim_size because element ids differ) -->
+        <test expect_num_outputs="2">
+            <param name="batch_cond|batch_select" value="joint"/>
+            <param name="batch_cond|paired_cond|paired_select" value="separate"/>
+            <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="batch_cond|paired_cond|sdaer" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="batch_cond|aggregate" value="FALSE"/>
+            <output name="output_fwd" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+            <output name="output_rev" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+        </test>
+        <!-- single, non-batch, aggregate, small sample -->
+        <test expect_num_outputs="1">
+            <param name="batch_cond|batch_select" value="joint"/>
+            <param name="batch_cond|paired_cond|paired_select" value="single"/>
+            <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz,F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="n" value="10000"/>
+            <param name="batch_cond|aggregate" value="TRUE"/>
+            <output name="output" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+        </test>
+
+        <!-- paired, batch -->
+        <test expect_num_outputs="2">
+            <param name="batch_cond|batch_select" value="batch"/>
+            <param name="batch_cond|paired_cond|paired_select" value="paired"/>
+            <param name="batch_cond|paired_cond|reads">
+                <collection type="paired">
+                    <element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
+                    <element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
+                </collection>
+            </param>
+            <output name="output_fwd" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+            <output name="output_rev" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+        </test>
+        <!-- paired-separate batch  (sim_size because element ids differ)-->
+        <test expect_num_outputs="2">
+            <param name="batch_cond|batch_select" value="batch"/>
+            <param name="batch_cond|paired_cond|paired_select" value="separate"/>
+            <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="batch_cond|paired_cond|sdaer" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
+            <output name="output_fwd" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+            <output name="output_rev" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+        </test>
+        <!-- single, batch -->
+        <test expect_num_outputs="1">
+            <param name="batch_cond|batch_select" value="batch"/>
+            <param name="batch_cond|paired_cond|paired_select" value="single"/>
+            <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="n" value="10000"/>
+            <output name="output" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Summary
+.......
+
+This function plots a histogram of the distribution of sequence complexities in the form of effective numbers of kmers as determined by seqComplexity. By default, kmers of size 2 are used, in which case a perfectly random sequences will approach an effective kmer number of 16 = 4 (nucleotides)^ 2 (kmer size).
+
+Details
+.......
+
+This function calculates the kmer complexity of input sequences.  Complexity is quantified as the Shannon richness of kmers, which can be thought of as the effective number of kmers if they were all at equal frequencies.  If a window size is provided, the minimum Shannon richness observed over sliding window along the sequence is returned.
+
+
+@HELP_OVERVIEW@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>