Mercurial > repos > iuc > stacks_cstacks
changeset 8:759ba5c7faca draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit dc23703c260d004a28fe24a2a7c00cb4371bc32e
| author | iuc | 
|---|---|
| date | Thu, 27 Apr 2017 04:18:23 -0400 | 
| parents | c541e62d6c56 | 
| children | ff1f282d731e | 
| files | macros.xml stacks_cstacks.xml test-data/demultiplexed/PopA_01.1.fq.gzip test-data/denovo_map/popmap_cstacks.tsv test-data/procrad/R1.fq.gzip test-data/ustacks/ustacks.out | 
| diffstat | 6 files changed, 153 insertions(+), 20 deletions(-) [+] | 
line wrap: on
 line diff
--- a/macros.xml Fri Apr 07 11:48:00 2017 -0400 +++ b/macros.xml Thu Apr 27 04:18:23 2017 -0400 @@ -2,14 +2,14 @@ <macros> <xml name="requirements"> <requirements> - <requirement type="package" version="1.42">stacks</requirement> + <requirement type="package" version="1.46">stacks</requirement> <requirement type="package" version="1.2.10">velvet</requirement> - <container type="docker">quay.io/biocontainers/stacks:1.42--2</container> + <requirement type="package" version="1.1">stacks_summary</requirement> <yield/> </requirements> </xml> - <token name="@WRAPPER_VERSION@">1.42</token> + <token name="@WRAPPER_VERSION@">1.46</token> <xml name="stdio"> <stdio> @@ -90,6 +90,7 @@ <option value="bsaHI">bsaHI</option> <option value="hpaII">hpaII</option> <option value="ncoI">ncoI</option> + <option value="ApaLI">ApaLI</option> </xml> <xml name="cross_types"> @@ -100,6 +101,19 @@ <option value="GEN">GEN (generic, unspecific to any map type)</option> </xml> + <token name="@CLEAN_EXT@"> + <![CDATA[ + #from os.path import splitext + #import re + #def clean_ext($identifier) + #while $identifier.endswith(('.1', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam')) + #set $identifier = splitext($identifier)[0] + #end while +$identifier#slurp + #end def + ]]> + </token> + <token name="@NORM_GENOTYPES_OUTPUT_LIGHT@"> <![CDATA[ ## We need to do this as the output file names contains the value of an option (min progeny)
--- a/stacks_cstacks.xml Fri Apr 07 11:48:00 2017 -0400 +++ b/stacks_cstacks.xml Thu Apr 27 04:18:23 2017 -0400 @@ -13,28 +13,33 @@ && #set $samples = "" - #for $input_file in $input_col: + #for $input_file in $input_col #set $filename = str($input_file.element_identifier) - #if not filename.endswith('.tsv'): + #if not filename.endswith('.tsv') #set $filename = $filename + ".tsv" #end if - #if not re.search('catalog\.[a-z]+(\.tsv)?$', $filename): - ln -s "${input_file}" "stacks_inputs/$filename" && + #if not re.search('catalog\.[a-z]+(\.tsv)?$', $filename) + ln -s '${input_file}' 'stacks_inputs/$filename' && - #if $filename.endswith('.tags.tsv'): - #set samples += " -s \"stacks_inputs/" + $filename[:-9] + "\"" + #if $filename.endswith('.tags.tsv') + #set samples += " -s 'stacks_inputs/" + $filename[:-9] + "'" #end if #end if #end for cstacks + ## Batch description + -b 1 + -p \${GALAXY_SLOTS:-1} - $samples - - ## Batch description - -b 1 + #if $popmap + -P stacks_inputs -M '$popmap' + #else + $samples + -o stacks_outputs + #end if $g @@ -42,20 +47,29 @@ $include_multiple - #if $gapped.use_gapped: + #if $gapped.use_gapped == "yes" --gapped --max_gaps $gapped.max_gaps --min_aln_len $gapped.min_aln_len #end if - -o stacks_outputs + 2>&1 | tee cstacks.log - > cstacks.log 2>&1 + #if $popmap + ## When using a popmap, stacks write to the input dir + && mv stacks_inputs/batch_1.catalog.*.tsv stacks_outputs/ + #end if + + && + + stacks_summary.py --stacks-prog cstacks --res-dir stacks_outputs --logfile cstacks.log --summary stacks_outputs/summary.html ]]></command> <inputs> <param name="input_col" format="tabular,txt" type="data_collection" collection_type="list" label="Output from previous Stacks pipeline steps (e.g. denovo_map, refmap, ustacks or pstacks)" /> + <param name="popmap" type="data" format="tabular,txt" label="Population map" help="If set, the catalog will be built from samples listed in this file" optional="true" argument="-M" /> + <param name="g" argument="-g" type="boolean" checked="false" truevalue="-g" falsevalue="" label="Base catalog matching on genomic location, not sequence identity" /> <param name="n" argument="-n" type="integer" value="1" label="Number of mismatches allowed between sample tags when building the catalog"/> @@ -63,9 +77,12 @@ <param name="include_multiple" argument="-m" type="boolean" checked="false" truevalue="-m" falsevalue="" label="Include tags in the catalog that match to more than one entry" /> <conditional name="gapped"> - <param name="use_gapped" argument="--gapped" type="boolean" checked="false" label="Perform gapped alignments between stacks" /> - <when value="false"></when> - <when value="true"> + <param name="use_gapped" argument="--gapped" type="select" label="Perform gapped alignments between stacks"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> <param name="max_gaps" argument="--max_gaps" type="integer" value="2" label="Number of gaps allowed between stacks before merging"/> <param name="min_aln_len" argument="--min_aln_len" type="float" value="0.8" min="0.0" max="1.0" label="Minimum length of aligned sequence in a gapped alignment"/> </when> @@ -75,6 +92,8 @@ <outputs> <data format="txt" name="output_log" label="cstacks.log with ${tool.name} on ${on_string}" from_work_dir="cstacks.log" /> + <data format="html" name="output_summary" label="Summary from ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/summary.html" /> + <data format="tabular" name="catalogtags" label="Catalog assembled loci (tags) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.tags.tsv" /> <data format="tabular" name="catalogsnps" label="Catalog model calls (snps) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.snps.tsv" /> <data format="tabular" name="catalogalleles" label="Catalog haplotypes (alleles) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.alleles.tsv" /> @@ -103,6 +122,11 @@ <has_text text="done." /> </assert_contents> </output> + <output name="output_summary"> + <assert_contents> + <has_text text="Stacks Statistics" /> + </assert_contents> + </output> <!-- catalog --> <output name="catalogtags"> @@ -138,13 +162,66 @@ <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" /> </collection> </param> - <param name="gapped|use_gapped" value="true" /> + <param name="gapped|use_gapped" value="yes" /> <output name="output_log"> <assert_contents> <has_text text="done." /> </assert_contents> </output> + <output name="output_summary"> + <assert_contents> + <has_text text="Stacks Statistics" /> + </assert_contents> + </output> + + <!-- catalog --> + <output name="catalogtags"> + <assert_contents> + <has_text text="catalog generated on" /> + </assert_contents> + </output> + <output name="catalogsnps"> + <assert_contents> + <has_text text="catalog generated on" /> + </assert_contents> + </output> + <output name="catalogalleles"> + <assert_contents> + <has_text text="catalog generated on" /> + </assert_contents> + </output> + </test> + + <test> + <param name="input_col"> + <collection type="list"> + <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" /> + <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" /> + <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" /> + <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" /> + <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" /> + <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" /> + <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" /> + <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" /> + <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" /> + <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" /> + <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" /> + </collection> + </param> + + <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> + + <output name="output_log"> + <assert_contents> + <has_text text="done." /> + </assert_contents> + </output> + <output name="output_summary"> + <assert_contents> + <has_text text="Stacks Statistics" /> + </assert_contents> + </output> <!-- catalog --> <output name="catalogtags">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/denovo_map/popmap_cstacks.tsv Thu Apr 27 04:18:23 2017 -0400 @@ -0,0 +1,1 @@ +PopA_01 myPopA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ustacks/ustacks.out Thu Apr 27 04:18:23 2017 -0400 @@ -0,0 +1,41 @@ +ustacks parameters selected: + Sample ID: 1 + Min depth of coverage to create a stack: 2 + Max distance allowed between stacks: 2 + Max distance allowed to align secondary reads: 4 + Max number of stacks allowed per de novo locus: 3 + Deleveraging algorithm: disabled + Removal algorithm: enabled + Model type: SNP + Alpha significance level for model: 0.05 + Gapped alignments: disabled +Parsing stacks_inputs/PopA_01.fq +Loading RAD-Tags...done +Loaded 66 RAD-Tags. + Inserted 7 elements into the RAD-Tags hash map. + 0 reads contained uncalled nucleotides that were modified. +4 initial stacks were populated; 3 stacks were set aside as secondary reads. +Initial coverage mean: 15.75; Std Dev: 7.46241; Max: 27 +Deleveraging trigger: 23; Removal trigger: 31 +Calculating distance for removing repetitive stacks. + Distance allowed between stacks: 1; searching with a k-mer length of 47 (48 k-mers per read); 1 k-mer hits required. +Removing repetitive stacks. + Removed 0 stacks. + 4 stacks remain for merging. +Post-Repeat Removal, coverage depth Mean: 15.75; Std Dev: 7.46241; Max: 27 +Calculating distance between stacks... + Distance allowed between stacks: 2; searching with a k-mer length of 31 (64 k-mers per read); 2 k-mer hits required. +Merging stacks, maximum allowed distance: 2 nucleotide(s) + 4 stacks merged into 3 loci; deleveraged 0 loci; blacklisted 0 loci. +After merging, coverage depth Mean: 21; Std Dev: 4.24264; Max: 27 +Merging remainder radtags + 3 remainder sequences left to merge. + Distance allowed between stacks: 4; searching with a k-mer length of 17 (78 k-mers per read); 10 k-mer hits required. + Matched 3 remainder reads; unable to match 0 remainder reads. +After remainders merged, coverage depth Mean: 22; Std Dev: 4.32049; Max: 28 +Calling final consensus sequences, invoking SNP-calling model... +Number of utilized reads: 66 +Writing loci, SNPs, and alleles to 'stacks_outputs/'... + Refetching sequencing IDs from stacks_inputs/PopA_01.fq... read 66 sequence IDs. +done. +ustacks is done.
