changeset 8:bec1f08cdfcc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit dc23703c260d004a28fe24a2a7c00cb4371bc32e
author iuc
date Thu, 27 Apr 2017 04:19:34 -0400
parents f1f715f5d2f3
children 57910d476be9
files macros.xml stacks_procrad.xml test-data/demultiplexed/PopA_01.1.fq.gzip test-data/denovo_map/popmap_cstacks.tsv test-data/procrad/R1.fq.gzip test-data/ustacks/ustacks.out
diffstat 6 files changed, 140 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Fri Apr 07 11:49:00 2017 -0400
+++ b/macros.xml	Thu Apr 27 04:19:34 2017 -0400
@@ -2,14 +2,14 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="1.42">stacks</requirement>
+            <requirement type="package" version="1.46">stacks</requirement>
             <requirement type="package" version="1.2.10">velvet</requirement>
-            <container type="docker">quay.io/biocontainers/stacks:1.42--2</container>
+            <requirement type="package" version="1.1">stacks_summary</requirement>
             <yield/>
         </requirements>
     </xml>
 
-    <token name="@WRAPPER_VERSION@">1.42</token>
+    <token name="@WRAPPER_VERSION@">1.46</token>
 
     <xml name="stdio">
         <stdio>
@@ -90,6 +90,7 @@
         <option value="bsaHI">bsaHI</option>
         <option value="hpaII">hpaII</option>
         <option value="ncoI">ncoI</option>
+        <option value="ApaLI">ApaLI</option>
     </xml>
 
     <xml name="cross_types">
@@ -100,6 +101,19 @@
         <option value="GEN">GEN (generic, unspecific to any map type)</option>
     </xml>
 
+    <token name="@CLEAN_EXT@">
+        <![CDATA[
+        #from os.path import splitext
+        #import re
+        #def clean_ext($identifier)
+            #while $identifier.endswith(('.1', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam'))
+                #set $identifier = splitext($identifier)[0]
+            #end while
+$identifier#slurp
+        #end def
+        ]]>
+    </token>
+
     <token name="@NORM_GENOTYPES_OUTPUT_LIGHT@">
         <![CDATA[
         ## We need to do this as the output file names contains the value of an option (min progeny)
--- a/stacks_procrad.xml	Fri Apr 07 11:49:00 2017 -0400
+++ b/stacks_procrad.xml	Thu Apr 27 04:19:34 2017 -0400
@@ -7,29 +7,29 @@
     <expand macro="stdio"/>
     <command><![CDATA[
 
-        #if $input_type.options_type_selector == "single":
+        #if $input_type.options_type_selector == "single"
 
-            #if $input_type.input_single.is_of_type('fastqsanger'):
+            #if $input_type.input_single.is_of_type('fastqsanger')
                 #set $ext = ".fq"
                 #set inputype = "fastq"
-            #else:
+            #else
                 #set $ext = ".fq.gz"
                 #set inputype = "gzfastq"
             #end if
 
-            ln -s "$input_type.input_single" R1$ext &&
+            ln -s '$input_type.input_single' R1$ext &&
         #else
 
-            #if $input_type.inputs_paired1.is_of_type('fastqsanger'):
+            #if $input_type.inputs_paired1.is_of_type('fastqsanger')
                 #set $ext = ".fq"
                 #set inputype = "fastq"
-            #else:
+            #else
                 #set $ext = ".fq.gz"
                 #set inputype = "gzfastq"
             #end if
 
-            ln -s "$input_type.inputs_paired1" R1$ext &&
-            ln -s "$input_type.inputs_paired2" R2$ext &&
+            ln -s '$input_type.inputs_paired1' R1$ext &&
+            ln -s '$input_type.inputs_paired2' R2$ext &&
         #end if
 
         mkdir stacks_outputs
@@ -38,31 +38,33 @@
 
         process_radtags
 
-            #if $input_type.options_type_selector == "single":
+            #if $input_type.options_type_selector == "single"
                 -f R1$ext
-            #else:
+            #else
                 -1 R1$ext
                 -2 R2$ext
             #end if
 
             -i $inputype
-            -b "$barcode"
+            -b '$barcode'
 
             $input_type.barcode_encoding
 
-            #if str( $options_enzyme.options_enzyme_selector ) == "1":
+            #if str( $options_enzyme.options_enzyme_selector ) == "1"
                 -e $options_enzyme.enzyme
-            #else:
+            #else
                 --renz_1 $options_enzyme.enzyme --renz_2 $options_enzyme.enzyme2
             #end if
 
-            -y $outype
+            #if str( $outype ) != "auto"
+                -y $outype
+            #end if
 
             $capture
 
             $options_advanced.retain_header
 
-            #if str($options_advanced.truncate):
+            #if str($options_advanced.truncate)
                 -t $options_advanced.truncate
             #end if
 
@@ -85,7 +87,7 @@
                 <option value="paired">Paired-end files</option>
             </param>
             <when value="single">
-                <param name="input_single" argument="-f" format="fastqsanger,fastq.gz" type="data" label="singles-end reads infile(s)" help="input files" />
+                <param name="input_single" argument="-f" format="fastqsanger,fastqsanger.gz" type="data" label="singles-end reads infile(s)" help="input files" />
 
                 <param name="barcode_encoding" type="select" label="Barcode location">
                     <option value="--inline_null" selected="True">Barcode is inline with sequence</option>
@@ -93,8 +95,8 @@
                 </param>
             </when>
             <when value="paired">
-                <param name="inputs_paired1" argument="-1" format="fastqsanger,fastq.gz" type="data" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" />
-                <param name="inputs_paired2" argument="-2" format="fastqsanger,fastq.gz" type="data" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" />
+                <param name="inputs_paired1" argument="-1" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" />
+                <param name="inputs_paired2" argument="-2" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" />
 
                 <param name="barcode_encoding" type="select" label="Barcode location">
                     <option value="--inline_null" selected="True">Barcode is inline with sequence, only on the single-end read (read 1)</option>
@@ -141,10 +143,11 @@
             <param name="retain_header" type="boolean" checked="false" truevalue="--retain_header" falsevalue="" argument="--retain_header" label="Retain unmodified FASTQ headers in the output" />
         </section>
 
-        <!-- Stacks can produce fastq.gz and fasta.gz output but we don't propose it as they are not very common datatypes in galaxy -->
-        <param name="outype" argument="-y" type="select" label="Output format" help="output type, either 'fastq' or 'fasta'" >
-            <option value="fastq" selected="True">fastq</option>
+        <param name="outype" argument="-y" type="select" label="Output format" >
+            <option value="auto" selected="True">Same as input</option>
+            <option value="fastq">fastq</option>
             <option value="fasta">fasta</option>
+            <option value="gzfastq">gzipped fastq</option>
         </param>
     </inputs>
 
@@ -153,16 +156,19 @@
 
         <collection name="demultiplexed" type="list" label="Demultiplexed reads from ${on_string}">
             <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" />
             <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" />
         </collection>
         <collection name="remaining" type="list" label="Remaining orphan reads from ${on_string}">
             <filter>input_type['options_type_selector'] == "paired"</filter>
             <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" />
             <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" />
         </collection>
         <collection name="discarded" type="list" label="${tool.name}: discarded reads from ${on_string}">
             <filter>capture is True</filter>
             <discover_datasets pattern="(?P&lt;name&gt;.+)\.fq\.discards$" ext="fastqsanger" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fq\.gz.discards$" ext="fastqsanger" directory="stacks_outputs" /> <!-- discards are never gzipped -->
             <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa\.discards$" ext="fasta" directory="stacks_outputs" />
         </collection>
     </outputs>
@@ -189,6 +195,23 @@
             </output_collection>
         </test>
         <test>
+            <param name="options_type_selector" value="single"/>
+            <param name="input_single" ftype="fastqsanger" value="procrad/R1.fq"/>
+            <param name="barcode" value="procrad/barcodes"/>
+            <param name="options_enzyme_selector" value="1"/>
+            <param name="enzyme" value="ecoRI"/>
+            <param name="discard" value="true"/>
+            <param name="capture" value="true"/>
+            <param name="outype" value="gzfastq"/>
+            <output name="output_log" file="procrad/process_radtags.out" compare="sim_size"/>
+            <output_collection name="demultiplexed">
+                <element name="PopA_01" ftype="fastqsanger.gz" md5="c7250f50138cbca747b85223aaae9565"/>
+            </output_collection>
+            <output_collection name="discarded">
+                <element name="R1" ftype="fastqsanger" md5="786b30d864332a2d56d9179f0a53add4"/>
+            </output_collection>
+        </test>
+        <test>
             <param name="options_type_selector" value="paired"/>
             <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/>
             <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/>
@@ -262,6 +285,43 @@
                 </element>
             </output_collection>
         </test>
+        <test>
+            <param name="options_type_selector" value="single"/>
+            <param name="input_single" ftype="fastqsanger" value="procrad/R1.fq.gzip"/>
+            <param name="barcode" value="procrad/barcodes"/>
+            <param name="options_enzyme_selector" value="1"/>
+            <param name="enzyme" value="ecoRI"/>
+            <param name="discard" value="true"/>
+            <param name="capture" value="true"/>
+            <output name="output_log" file="procrad/process_radtags.out" compare="sim_size"/>
+            <output_collection name="demultiplexed">
+                <element name="PopA_01" compare="sim_size" file="demultiplexed/PopA_01.1.fq"/>
+            </output_collection>
+            <output_collection name="discarded">
+                <element name="R1">
+                    <assert_contents>
+                        <has_text text="lane1_fakedata0_11" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="options_type_selector" value="single"/>
+            <param name="input_single" ftype="fastqsanger.gz" value="procrad/R1.fq.gzip"/>
+            <param name="barcode" value="procrad/barcodes"/>
+            <param name="options_enzyme_selector" value="1"/>
+            <param name="enzyme" value="ecoRI"/>
+            <param name="discard" value="true"/>
+            <param name="capture" value="true"/>
+            <param name="outype" value="gzfastq"/>
+            <output name="output_log" file="procrad/process_radtags.out" compare="sim_size"/>
+            <output_collection name="demultiplexed">
+                <element name="PopA_01" compare="sim_size" file="demultiplexed/PopA_01.1.fq.gzip"/>
+            </output_collection>
+            <output_collection name="discarded">
+                <element name="R1" ftype="fastqsanger" md5="786b30d864332a2d56d9179f0a53add4"/>
+            </output_collection>
+        </test>
     </tests>
 
 
Binary file test-data/demultiplexed/PopA_01.1.fq.gzip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/denovo_map/popmap_cstacks.tsv	Thu Apr 27 04:19:34 2017 -0400
@@ -0,0 +1,1 @@
+PopA_01	myPopA
Binary file test-data/procrad/R1.fq.gzip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ustacks/ustacks.out	Thu Apr 27 04:19:34 2017 -0400
@@ -0,0 +1,41 @@
+ustacks parameters selected:
+  Sample ID: 1
+  Min depth of coverage to create a stack: 2
+  Max distance allowed between stacks: 2
+  Max distance allowed to align secondary reads: 4
+  Max number of stacks allowed per de novo locus: 3
+  Deleveraging algorithm: disabled
+  Removal algorithm: enabled
+  Model type: SNP
+  Alpha significance level for model: 0.05
+  Gapped alignments: disabled
+Parsing stacks_inputs/PopA_01.fq
+Loading RAD-Tags...done
+Loaded 66 RAD-Tags.
+  Inserted 7 elements into the RAD-Tags hash map.
+  0 reads contained uncalled nucleotides that were modified.
+4 initial stacks were populated; 3 stacks were set aside as secondary reads.
+Initial coverage mean: 15.75; Std Dev: 7.46241; Max: 27
+Deleveraging trigger: 23; Removal trigger: 31
+Calculating distance for removing repetitive stacks.
+  Distance allowed between stacks: 1; searching with a k-mer length of 47 (48 k-mers per read); 1 k-mer hits required.
+Removing repetitive stacks.
+  Removed 0 stacks.
+  4 stacks remain for merging.
+Post-Repeat Removal, coverage depth Mean: 15.75; Std Dev: 7.46241; Max: 27
+Calculating distance between stacks...
+  Distance allowed between stacks: 2; searching with a k-mer length of 31 (64 k-mers per read); 2 k-mer hits required.
+Merging stacks, maximum allowed distance: 2 nucleotide(s)
+  4 stacks merged into 3 loci; deleveraged 0 loci; blacklisted 0 loci.
+After merging, coverage depth Mean: 21; Std Dev: 4.24264; Max: 27
+Merging remainder radtags
+  3 remainder sequences left to merge.
+  Distance allowed between stacks: 4; searching with a k-mer length of 17 (78 k-mers per read); 10 k-mer hits required.
+  Matched 3 remainder reads; unable to match 0 remainder reads.
+After remainders merged, coverage depth Mean: 22; Std Dev: 4.32049; Max: 28
+Calling final consensus sequences, invoking SNP-calling model...
+Number of utilized reads: 66
+Writing loci, SNPs, and alleles to 'stacks_outputs/'...
+  Refetching sequencing IDs from stacks_inputs/PopA_01.fq... read 66 sequence IDs.
+done.
+ustacks is done.