Mercurial > repos > iuc > stacks_refmap

--- a/macros.xml	Fri Apr 07 11:46:59 2017 -0400
+++ b/macros.xml	Thu Apr 27 04:17:01 2017 -0400
@@ -2,14 +2,14 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="1.42">stacks</requirement>
+            <requirement type="package" version="1.46">stacks</requirement>
             <requirement type="package" version="1.2.10">velvet</requirement>
-            <container type="docker">quay.io/biocontainers/stacks:1.42--2</container>
+            <requirement type="package" version="1.1">stacks_summary</requirement>
             <yield/>
         </requirements>
     </xml>

-    <token name="@WRAPPER_VERSION@">1.42</token>
+    <token name="@WRAPPER_VERSION@">1.46</token>

     <xml name="stdio">
         <stdio>
@@ -90,6 +90,7 @@
         <option value="bsaHI">bsaHI</option>
         <option value="hpaII">hpaII</option>
         <option value="ncoI">ncoI</option>
+        <option value="ApaLI">ApaLI</option>
     </xml>

     <xml name="cross_types">
@@ -100,6 +101,19 @@
         <option value="GEN">GEN (generic, unspecific to any map type)</option>
     </xml>

+    <token name="@CLEAN_EXT@">
+        <![CDATA[
+        #from os.path import splitext
+        #import re
+        #def clean_ext($identifier)
+            #while $identifier.endswith(('.1', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam'))
+                #set $identifier = splitext($identifier)[0]
+            #end while
+$identifier#slurp
+        #end def
+        ]]>
+    </token>
+
     <token name="@NORM_GENOTYPES_OUTPUT_LIGHT@">
         <![CDATA[
         ## We need to do this as the output file names contains the value of an option (min progeny)
--- a/stacks_refmap.xml	Fri Apr 07 11:46:59 2017 -0400
+++ b/stacks_refmap.xml	Thu Apr 27 04:17:01 2017 -0400
@@ -6,54 +6,42 @@
     <expand macro="requirements"/>
     <expand macro="stdio"/>
     <command><![CDATA[
-        #from os.path import splitext
-        #import re
+
+        @CLEAN_EXT@

-        #if str( $options_usage.rad_analysis_type ) == "genetic":
-            #for $input_parent in $options_usage.parent_alignments:
-                #if $input_parent.is_of_type('sam'):
-                    #set $data_path = splitext($input_parent.element_identifier)[0]
-                    #set $data_path = re.sub(r'\.1$', '', $data_path)
-                    #set $data_path = $data_path + ".sam"
-                #else:
-                    #set $data_path = splitext($input_parent.element_identifier)[0]
-                    #set $data_path = re.sub(r'\.1$', '', $data_path)
-                    #set $data_path = $data_path + ".bam"
+        #if str( $options_usage.rad_analysis_type ) == "genetic"
+            #for $input_parent in $options_usage.parent_alignments
+                #if $input_parent.is_of_type('sam')
+                    #set $data_path = $clean_ext($input_parent.element_identifier) + ".sam"
+                #else
+                    #set $data_path = $clean_ext($input_parent.element_identifier) + ".bam"
                 #end if

-                ln -s "${input_parent}" "${data_path}" &&
+                ln -s '${input_parent}' '${data_path}' &&
             #end for

-            #for $input_progeny in $options_usage.progeny_alignments:
+            #for $input_progeny in $options_usage.progeny_alignments

-                #if $input_progeny:
-                    #if $input_progeny.is_of_type('sam'):
-                        #set $data_path = splitext($input_progeny.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".sam"
-                    #else:
-                        #set $data_path = splitext($input_progeny.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".bam"
+                #if $input_progeny
+                    #if $input_progeny.is_of_type('sam')
+                        #set $data_path = $clean_ext($input_progeny.element_identifier) + ".sam"
+                    #else
+                        #set $data_path = $clean_ext($input_progeny.element_identifier) + ".bam"
                     #end if

-                    ln -s "${input_progeny}" "${data_path}" &&
+                    ln -s '${input_progeny}' '${data_path}' &&
                 #end if
             #end for
-        #else:
-            #for $input_indiv in $options_usage.individual_sample:
+        #else
+            #for $input_indiv in $options_usage.individual_sample

-                #if $input_indiv.is_of_type('sam'):
-                    #set $data_path = splitext($input_indiv.element_identifier)[0]
-                    #set $data_path = re.sub(r'\.1$', '', $data_path)
-                    #set $data_path = $data_path + ".sam"
-                #else:
-                    #set $data_path = splitext($input_indiv.element_identifier)[0]
-                    #set $data_path = re.sub(r'\.1$', '', $data_path)
-                    #set $data_path = $data_path + ".bam"
+                #if $input_indiv.is_of_type('sam')
+                    #set $data_path = $clean_ext($input_indiv.element_identifier) + ".sam"
+                #else
+                    #set $data_path = $clean_ext($input_indiv.element_identifier) + ".bam"
                 #end if

-                ln -s "${input_indiv}" "${data_path}" &&
+                ln -s '${input_indiv}' '${data_path}' &&
             #end for
         #end if

@@ -65,60 +53,48 @@

             -T \${GALAXY_SLOTS:-1}

-            #if str( $options_usage.rad_analysis_type ) == "genetic":
-                #for $input_parent in $options_usage.parent_alignments:
-                    #if $input_parent.is_of_type('sam'):
-                        #set $data_path = splitext($input_parent.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".sam"
-                    #else:
-                        #set $data_path = splitext($input_parent.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".bam"
+            #if str( $options_usage.rad_analysis_type ) == "genetic"
+                #for $input_parent in $options_usage.parent_alignments
+                    #if $input_parent.is_of_type('sam')
+                        #set $data_path = $clean_ext($input_parent.element_identifier) + ".sam"
+                    #else
+                        #set $data_path = $clean_ext($input_parent.element_identifier) + ".bam"
                     #end if

-                    -p "${data_path}"
+                    -p '${data_path}'
                 #end for

                 -A $options_usage.cross_type

-                #for $input_progeny in $options_usage.progeny_alignments:
-                    #if $input_progeny:
-                        #if $input_progeny.is_of_type('sam'):
-                            #set $data_path = splitext($input_progeny.element_identifier)[0]
-                            #set $data_path = re.sub(r'\.1$', '', $data_path)
-                            #set $data_path = $data_path + ".sam"
-                        #else:
-                            #set $data_path = splitext($input_progeny.element_identifier)[0]
-                            #set $data_path = re.sub(r'\.1$', '', $data_path)
-                            #set $data_path = $data_path + ".bam"
+                #for $input_progeny in $options_usage.progeny_alignments
+                    #if $input_progeny
+                        #if $input_progeny.is_of_type('sam')
+                            #set $data_path = $clean_ext($input_progeny.element_identifier) + ".sam"
+                        #else
+                            #set $data_path = $clean_ext($input_progeny.element_identifier) + ".bam"
                         #end if

-                        -r "${data_path}"
+                        -r '${data_path}'
                     #end if
                 #end for
-            #else:
-                #for $input_indiv in $options_usage.individual_sample:
+            #else
+                #for $input_indiv in $options_usage.individual_sample

-                    #if $input_indiv.is_of_type('sam'):
-                        #set $data_path = splitext($input_indiv.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".sam"
-                    #else:
-                        #set $data_path = splitext($input_indiv.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".bam"
+                    #if $input_indiv.is_of_type('sam')
+                        #set $data_path = $clean_ext($input_indiv.element_identifier) + ".sam"
+                    #else
+                        #set $data_path = $clean_ext($input_indiv.element_identifier) + ".bam"
                     #end if

-                    -s "${data_path}"
+                    -s '${data_path}'
                 #end for
-                -O "$options_usage.popmap"
+                -O '$options_usage.popmap'
             #end if

-            #if str($m):
+            #if str($m)
                 -m $m
             #end if
-            #if str($P):
+            #if str($P)
                 -P $P
             #end if

@@ -129,22 +105,29 @@
             -S

             ## snp_model
-            #if str( $snp_options.select_model.model_type) == "bounded":
+            #if str( $snp_options.select_model.model_type) == "bounded"
                 --bound_low $snp_options.select_model.bound_low
                 --bound_high $snp_options.select_model.bound_high
                 --alpha $snp_options.select_model.alpha
-            #else if str( $snp_options.select_model.model_type) == "snp":
+            #else if str( $snp_options.select_model.model_type) == "snp"
                 --alpha $snp_options.select_model.alpha
             #end if

             -o stacks_outputs

-            #if str( $options_usage.rad_analysis_type ) == "genetic":
+            #if str( $options_usage.rad_analysis_type ) == "genetic"
                 @NORM_GENOTYPES_OUTPUT_LIGHT@
             #end if

             ## If input is in bam format, stacks will output gzipped files (no option to control this)
             && if ls stacks_outputs/*.gz > /dev/null 2>&1; then gunzip stacks_outputs/*.gz; fi
+
+            &&
+
+            stacks_summary.py --stacks-prog ref_map.pl --res-dir stacks_outputs --logfile stacks_outputs/ref_map.log --summary stacks_outputs/summary.html
+            #if str( $options_usage.rad_analysis_type ) == "population":
+                --pop-map '$options_usage.popmap'
+            #end if
     ]]></command>

     <inputs>
@@ -178,6 +161,8 @@
     <outputs>
         <data format="txt" name="output_log" label="ref_map.log with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/ref_map.log" />

+        <data format="html" name="output_summary" label="Summary from ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/summary.html" />
+
         <data format="tabular" name="catalogtags" label="Catalog assembled loci (tags) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.tags.tsv" />
         <data format="tabular" name="catalogsnps" label="Catalog model calls (snps) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.snps.tsv" />
         <data format="tabular" name="catalogalleles" label="Catalog haplotypes (alleles) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.alleles.tsv" />
@@ -217,6 +202,11 @@
                     <has_text text="ref_map.pl completed" />
                 </assert_contents>
             </output>
+            <output name="output_summary">
+                <assert_contents>
+                    <has_text text="Stacks Statistics" />
+                </assert_contents>
+            </output>

             <!-- catalog -->
             <output name="catalogsnps">
@@ -306,6 +296,11 @@
                     <has_text text="ref_map.pl completed" />
                 </assert_contents>
             </output>
+            <output name="output_summary">
+                <assert_contents>
+                    <has_text text="Stacks Statistics" />
+                </assert_contents>
+            </output>

             <!-- catalog -->
             <output name="catalogsnps">
@@ -395,6 +390,11 @@
                     <has_text text="ref_map.pl completed" />
                 </assert_contents>
             </output>
+            <output name="output_summary">
+                <assert_contents>
+                    <has_text text="Stacks Statistics" />
+                </assert_contents>
+            </output>

             <!-- catalog -->
             <output name="catalogtags">
Binary file test-data/demultiplexed/PopA_01.1.fq.gzip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/denovo_map/popmap_cstacks.tsv	Thu Apr 27 04:17:01 2017 -0400
@@ -0,0 +1,1 @@
+PopA_01	myPopA
Binary file test-data/procrad/R1.fq.gzip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ustacks/ustacks.out	Thu Apr 27 04:17:01 2017 -0400
@@ -0,0 +1,41 @@
+ustacks parameters selected:
+  Sample ID: 1
+  Min depth of coverage to create a stack: 2
+  Max distance allowed between stacks: 2
+  Max distance allowed to align secondary reads: 4
+  Max number of stacks allowed per de novo locus: 3
+  Deleveraging algorithm: disabled
+  Removal algorithm: enabled
+  Model type: SNP
+  Alpha significance level for model: 0.05
+  Gapped alignments: disabled
+Parsing stacks_inputs/PopA_01.fq
+Loading RAD-Tags...done
+Loaded 66 RAD-Tags.
+  Inserted 7 elements into the RAD-Tags hash map.
+  0 reads contained uncalled nucleotides that were modified.
+4 initial stacks were populated; 3 stacks were set aside as secondary reads.
+Initial coverage mean: 15.75; Std Dev: 7.46241; Max: 27
+Deleveraging trigger: 23; Removal trigger: 31
+Calculating distance for removing repetitive stacks.
+  Distance allowed between stacks: 1; searching with a k-mer length of 47 (48 k-mers per read); 1 k-mer hits required.
+Removing repetitive stacks.
+  Removed 0 stacks.
+  4 stacks remain for merging.
+Post-Repeat Removal, coverage depth Mean: 15.75; Std Dev: 7.46241; Max: 27
+Calculating distance between stacks...
+  Distance allowed between stacks: 2; searching with a k-mer length of 31 (64 k-mers per read); 2 k-mer hits required.
+Merging stacks, maximum allowed distance: 2 nucleotide(s)
+  4 stacks merged into 3 loci; deleveraged 0 loci; blacklisted 0 loci.
+After merging, coverage depth Mean: 21; Std Dev: 4.24264; Max: 27
+Merging remainder radtags
+  3 remainder sequences left to merge.
+  Distance allowed between stacks: 4; searching with a k-mer length of 17 (78 k-mers per read); 10 k-mer hits required.
+  Matched 3 remainder reads; unable to match 0 remainder reads.
+After remainders merged, coverage depth Mean: 22; Std Dev: 4.32049; Max: 28
+Calling final consensus sequences, invoking SNP-calling model...
+Number of utilized reads: 66
+Writing loci, SNPs, and alleles to 'stacks_outputs/'...
+  Refetching sequencing IDs from stacks_inputs/PopA_01.fq... read 66 sequence IDs.
+done.
+ustacks is done.