Mercurial > repos > iuc > stacks_assembleperead

--- a/macros.xml	Fri Apr 07 11:48:12 2017 -0400
+++ b/macros.xml	Thu Apr 27 04:18:45 2017 -0400
@@ -2,14 +2,14 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="1.42">stacks</requirement>
+            <requirement type="package" version="1.46">stacks</requirement>
             <requirement type="package" version="1.2.10">velvet</requirement>
-            <container type="docker">quay.io/biocontainers/stacks:1.42--2</container>
+            <requirement type="package" version="1.1">stacks_summary</requirement>
             <yield/>
         </requirements>
     </xml>

-    <token name="@WRAPPER_VERSION@">1.42</token>
+    <token name="@WRAPPER_VERSION@">1.46</token>

     <xml name="stdio">
         <stdio>
@@ -90,6 +90,7 @@
         <option value="bsaHI">bsaHI</option>
         <option value="hpaII">hpaII</option>
         <option value="ncoI">ncoI</option>
+        <option value="ApaLI">ApaLI</option>
     </xml>

     <xml name="cross_types">
@@ -100,6 +101,19 @@
         <option value="GEN">GEN (generic, unspecific to any map type)</option>
     </xml>

+    <token name="@CLEAN_EXT@">
+        <![CDATA[
+        #from os.path import splitext
+        #import re
+        #def clean_ext($identifier)
+            #while $identifier.endswith(('.1', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam'))
+                #set $identifier = splitext($identifier)[0]
+            #end while
+$identifier#slurp
+        #end def
+        ]]>
+    </token>
+
     <token name="@NORM_GENOTYPES_OUTPUT_LIGHT@">
         <![CDATA[
         ## We need to do this as the output file names contains the value of an option (min progeny)
--- a/stacks_assembleperead.xml	Fri Apr 07 11:48:12 2017 -0400
+++ b/stacks_assembleperead.xml	Thu Apr 27 04:18:45 2017 -0400
@@ -11,45 +11,45 @@

         &&

-        #for $input_file in $stacks_col:
+        #for $input_file in $stacks_col
             #set $ext = ""
-            #if not str($input_file.element_identifier).endswith('.tsv'):
+            #if not str($input_file.element_identifier).endswith('.tsv')
                 #set $ext = ".tsv"
             #end if
-            ln -s "${input_file}" "stacks_inputs/${input_file.element_identifier}${ext}" &&
+            ln -s '${input_file}' 'stacks_inputs/${input_file.element_identifier}${ext}' &&
         #end for

-        #for $input_file in $reads:
+        #for $input_file in $reads
             #set $name = str($input_file.element_identifier)
             ## sort_read_pairs is expecting strange fastq names: <sample_name>.fq_2
-            #if $name.endswith('.1.fq'):
+            #if $name.endswith('.1.fq')
                 ## handle a common case
                 #set $name = $name[:-5]+".fq_1"
-            #else if $name.endswith('.2.fq'):
+            #else if $name.endswith('.2.fq')
                 ## handle a common case
                 #set $name = $name[:-5]+".fq_2"
-            #else if not $name.endswith('.fq') and not $name.endswith('.fq_2'):
+            #else if not $name.endswith('.fq') and not $name.endswith('.fq_2')
                 ## no extension, consider it's a fq_2 file
                 #set $name = $name + ".fq_2"
             #end if
-            ln -s "${input_file}" "reads/${name}" &&
+            ln -s '${input_file}' 'reads/${name}' &&
         #end for

         sort_read_pairs.pl
             -p stacks_inputs
             -s 'reads'

-            #if $whitelist:
+            #if $whitelist
                 -w '$whitelist'
             #end if

-            #if $threshold:
+            #if $threshold
                 -r $threshold
             #end if

             -o stacks_outputs

-        #if $velvet.use_velvet:
+        #if $velvet.use_velvet == "yes"
             ## remove possible empty files
             && find stacks_outputs -type f -size 0 -delete

@@ -70,21 +70,24 @@
         <param name="threshold" argument="-r" type="integer" value="" optional="true" label="Minimum number of reads by locus"/>

        <conditional name="velvet">
-            <param name="use_velvet" type="boolean" checked="false" label="Perform assembly with Velvet" help="If not selected, the tool will only produce of collection of fasta files (one per locus) containing reads ready to assemble." />
-            <when value="false"></when>
-            <when value="true">
+            <param name="use_velvet" type="select" label="Perform assembly with Velvet" help="If not selected, the tool will only produce of collection of fasta files (one per locus) containing reads ready to assemble.">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
                 <param name="contig_length" type="integer" value="200" label="Minimum length for asssembled contigs"/>
             </when>
         </conditional>
     </inputs>
     <outputs>
         <collection name="collated" type="list" label="Collated FASTA files per locus on ${on_string}">
-            <filter>not velvet['use_velvet']</filter>
+            <filter>velvet['use_velvet'] == "no"</filter>
             <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa(sta)?$" ext="fasta" directory="stacks_outputs" />
         </collection>

         <data format="fasta" name="contigs" label="Assembled contigs on ${on_string}" from_work_dir="assembled/collated.fa">
-            <filter>velvet['use_velvet']</filter>
+            <filter>velvet['use_velvet'] == "yes"</filter>
         </data>
     </outputs>

@@ -132,12 +135,12 @@
                 </collection>
             </param>
             <param name="reads" value="demultiplexed/PopA_01.2.fq,demultiplexed/PopA_02.2.fq" ftype="fastqsanger" />
-            <param name="velvet|use_velvet" value="true" />
+            <param name="velvet|use_velvet" value="yes" />
             <param name="velvet|contig_length" value="20" />

             <output name="contigs">
                 <assert_contents>
-                    <has_text text="TGTATTCTCCCATGCGACAGCAGGACATCCCATCCCCCTCTGATGTTATCAATCATAAGA" />
+                    <has_text text="|NODE_" />
                 </assert_contents>
             </output>
         </test>
Binary file test-data/demultiplexed/PopA_01.1.fq.gzip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/denovo_map/popmap_cstacks.tsv	Thu Apr 27 04:18:45 2017 -0400
@@ -0,0 +1,1 @@
+PopA_01	myPopA
Binary file test-data/procrad/R1.fq.gzip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ustacks/ustacks.out	Thu Apr 27 04:18:45 2017 -0400
@@ -0,0 +1,41 @@
+ustacks parameters selected:
+  Sample ID: 1
+  Min depth of coverage to create a stack: 2
+  Max distance allowed between stacks: 2
+  Max distance allowed to align secondary reads: 4
+  Max number of stacks allowed per de novo locus: 3
+  Deleveraging algorithm: disabled
+  Removal algorithm: enabled
+  Model type: SNP
+  Alpha significance level for model: 0.05
+  Gapped alignments: disabled
+Parsing stacks_inputs/PopA_01.fq
+Loading RAD-Tags...done
+Loaded 66 RAD-Tags.
+  Inserted 7 elements into the RAD-Tags hash map.
+  0 reads contained uncalled nucleotides that were modified.
+4 initial stacks were populated; 3 stacks were set aside as secondary reads.
+Initial coverage mean: 15.75; Std Dev: 7.46241; Max: 27
+Deleveraging trigger: 23; Removal trigger: 31
+Calculating distance for removing repetitive stacks.
+  Distance allowed between stacks: 1; searching with a k-mer length of 47 (48 k-mers per read); 1 k-mer hits required.
+Removing repetitive stacks.
+  Removed 0 stacks.
+  4 stacks remain for merging.
+Post-Repeat Removal, coverage depth Mean: 15.75; Std Dev: 7.46241; Max: 27
+Calculating distance between stacks...
+  Distance allowed between stacks: 2; searching with a k-mer length of 31 (64 k-mers per read); 2 k-mer hits required.
+Merging stacks, maximum allowed distance: 2 nucleotide(s)
+  4 stacks merged into 3 loci; deleveraged 0 loci; blacklisted 0 loci.
+After merging, coverage depth Mean: 21; Std Dev: 4.24264; Max: 27
+Merging remainder radtags
+  3 remainder sequences left to merge.
+  Distance allowed between stacks: 4; searching with a k-mer length of 17 (78 k-mers per read); 10 k-mer hits required.
+  Matched 3 remainder reads; unable to match 0 remainder reads.
+After remainders merged, coverage depth Mean: 22; Std Dev: 4.32049; Max: 28
+Calling final consensus sequences, invoking SNP-calling model...
+Number of utilized reads: 66
+Writing loci, SNPs, and alleles to 'stacks_outputs/'...
+  Refetching sequencing IDs from stacks_inputs/PopA_01.fq... read 66 sequence IDs.
+done.
+ustacks is done.