Mercurial > repos > iuc > stacks_pstacks
changeset 8:f42f9ae6d109 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit dc23703c260d004a28fe24a2a7c00cb4371bc32e
author | iuc |
---|---|
date | Thu, 27 Apr 2017 04:17:39 -0400 |
parents | 880c3cb5a5a6 |
children | 7ebc7d229deb |
files | macros.xml stacks_pstacks.xml test-data/demultiplexed/PopA_01.1.fq.gzip test-data/denovo_map/popmap_cstacks.tsv test-data/procrad/R1.fq.gzip test-data/ustacks/ustacks.out |
diffstat | 6 files changed, 91 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Fri Apr 07 11:47:36 2017 -0400 +++ b/macros.xml Thu Apr 27 04:17:39 2017 -0400 @@ -2,14 +2,14 @@ <macros> <xml name="requirements"> <requirements> - <requirement type="package" version="1.42">stacks</requirement> + <requirement type="package" version="1.46">stacks</requirement> <requirement type="package" version="1.2.10">velvet</requirement> - <container type="docker">quay.io/biocontainers/stacks:1.42--2</container> + <requirement type="package" version="1.1">stacks_summary</requirement> <yield/> </requirements> </xml> - <token name="@WRAPPER_VERSION@">1.42</token> + <token name="@WRAPPER_VERSION@">1.46</token> <xml name="stdio"> <stdio> @@ -90,6 +90,7 @@ <option value="bsaHI">bsaHI</option> <option value="hpaII">hpaII</option> <option value="ncoI">ncoI</option> + <option value="ApaLI">ApaLI</option> </xml> <xml name="cross_types"> @@ -100,6 +101,19 @@ <option value="GEN">GEN (generic, unspecific to any map type)</option> </xml> + <token name="@CLEAN_EXT@"> + <![CDATA[ + #from os.path import splitext + #import re + #def clean_ext($identifier) + #while $identifier.endswith(('.1', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam')) + #set $identifier = splitext($identifier)[0] + #end while +$identifier#slurp + #end def + ]]> + </token> + <token name="@NORM_GENOTYPES_OUTPUT_LIGHT@"> <![CDATA[ ## We need to do this as the output file names contains the value of an option (min progeny)
--- a/stacks_pstacks.xml Fri Apr 07 11:47:36 2017 -0400 +++ b/stacks_pstacks.xml Thu Apr 27 04:17:39 2017 -0400 @@ -6,26 +6,22 @@ <expand macro="requirements"/> <expand macro="stdio"/> <command><![CDATA[ - #from os.path import splitext - #import re + + @CLEAN_EXT@ mkdir stacks_inputs stacks_outputs && - #if $sample.is_of_type('sam'): - #set $data_path = splitext($sample.element_identifier)[0] - #set $data_path = re.sub(r'\.1$', '', $data_path) - #set $data_path = "stacks_inputs/" + $data_path + ".sam" + #if $sample.is_of_type('sam') + #set $data_path = "stacks_inputs/" + $clean_ext($sample.element_identifier) + ".sam" #set inputype = "sam" - #else: - #set $data_path = splitext($sample.element_identifier)[0] - #set $data_path = re.sub(r'\.1$', '', $data_path) - #set $data_path = "stacks_inputs/" + $data_path + ".bam" + #else + #set $data_path = "stacks_inputs/" + $clean_ext($sample.element_identifier) + ".bam" #set inputype = "bam" #end if - ln -s "${sample}" "${data_path}" + ln -s '${sample}' '${data_path}' && @@ -42,13 +38,17 @@ -m $m + --max_clipped $max_clipped + --min_mapq $min_mapq + $keep_sec_alns + ## snp_model - #if str( $snp_options.select_model.model_type) == "bounded": + #if str( $snp_options.select_model.model_type) == "bounded" --model_type bounded --bound_low $snp_options.select_model.bound_low --bound_high $snp_options.select_model.bound_high --alpha $snp_options.select_model.alpha - #else if str( $snp_options.select_model.model_type) == "snp": + #else if str( $snp_options.select_model.model_type) == "snp" --model_type snp --alpha $snp_options.select_model.alpha #else @@ -58,19 +58,28 @@ -o stacks_outputs - > pstacks.log 2>&1 + 2>&1 | tee pstacks.log ## If input is in bam format, stacks will output gzipped files (no option to control this) && if ls stacks_outputs/*.gz > /dev/null 2>&1; then gunzip stacks_outputs/*.gz; fi + + && + + stacks_summary.py --stacks-prog pstacks --res-dir stacks_outputs --logfile pstacks.log --summary stacks_outputs/summary.html ]]></command> <inputs> - <param name="sample" argument="-f" format="fastqsanger,fasta" type="data" label="Input short reads from an individual" /> + <param name="sample" argument="-f" format="sam,bam" type="data" label="Input short reads from an individual" /> <param name="sample_id" argument="-i" type="integer" value="" label="Give a unique numeric ID to this sample"/> <param name="m" argument="-m" type="integer" value="1" label="Minimum depth of coverage required to create a stack"/> + <param name="max_clipped" argument="--max_clipped" type="float" value="0.15" min="0.0" max="1.0" label="Alignments with more than this fraction of soft-clipped bases are discarded"/> + + <param name="min_mapq" argument="--min_mapq" type="integer" value="10" label="Minimum required mapping quality"/> + + <param name="keep_sec_alns" argument="--keep_sec_alns" type="boolean" checked="false" truevalue="--keep_sec_alns" falsevalue="" label="Keep secondary alignments" /> <!-- SNP Model options --> <section name="snp_options" title="SNP Model Options (pstacks options)" expanded="False"> @@ -80,6 +89,9 @@ <outputs> <data format="txt" name="output_log" label="pstacks.log with ${tool.name} on ${on_string}" from_work_dir="pstacks.log" /> + + <data format="html" name="output_summary" label="Summary from ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/summary.html" /> + <collection name="tabs" type="list" label="Stacks from ${on_string}"> <discover_datasets pattern="(?P<name>.+\.tags)\.tsv$" ext="tabular" directory="stacks_outputs" /> <discover_datasets pattern="(?P<name>.+\.snps)\.tsv$" ext="tabular" directory="stacks_outputs" /> @@ -98,6 +110,11 @@ <has_text text="done." /> </assert_contents> </output> + <output name="output_summary"> + <assert_contents> + <has_text text="Stacks Statistics" /> + </assert_contents> + </output> <output_collection name="tabs"> <element name="PopA_01.tags">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/denovo_map/popmap_cstacks.tsv Thu Apr 27 04:17:39 2017 -0400 @@ -0,0 +1,1 @@ +PopA_01 myPopA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ustacks/ustacks.out Thu Apr 27 04:17:39 2017 -0400 @@ -0,0 +1,41 @@ +ustacks parameters selected: + Sample ID: 1 + Min depth of coverage to create a stack: 2 + Max distance allowed between stacks: 2 + Max distance allowed to align secondary reads: 4 + Max number of stacks allowed per de novo locus: 3 + Deleveraging algorithm: disabled + Removal algorithm: enabled + Model type: SNP + Alpha significance level for model: 0.05 + Gapped alignments: disabled +Parsing stacks_inputs/PopA_01.fq +Loading RAD-Tags...done +Loaded 66 RAD-Tags. + Inserted 7 elements into the RAD-Tags hash map. + 0 reads contained uncalled nucleotides that were modified. +4 initial stacks were populated; 3 stacks were set aside as secondary reads. +Initial coverage mean: 15.75; Std Dev: 7.46241; Max: 27 +Deleveraging trigger: 23; Removal trigger: 31 +Calculating distance for removing repetitive stacks. + Distance allowed between stacks: 1; searching with a k-mer length of 47 (48 k-mers per read); 1 k-mer hits required. +Removing repetitive stacks. + Removed 0 stacks. + 4 stacks remain for merging. +Post-Repeat Removal, coverage depth Mean: 15.75; Std Dev: 7.46241; Max: 27 +Calculating distance between stacks... + Distance allowed between stacks: 2; searching with a k-mer length of 31 (64 k-mers per read); 2 k-mer hits required. +Merging stacks, maximum allowed distance: 2 nucleotide(s) + 4 stacks merged into 3 loci; deleveraged 0 loci; blacklisted 0 loci. +After merging, coverage depth Mean: 21; Std Dev: 4.24264; Max: 27 +Merging remainder radtags + 3 remainder sequences left to merge. + Distance allowed between stacks: 4; searching with a k-mer length of 17 (78 k-mers per read); 10 k-mer hits required. + Matched 3 remainder reads; unable to match 0 remainder reads. +After remainders merged, coverage depth Mean: 22; Std Dev: 4.32049; Max: 28 +Calling final consensus sequences, invoking SNP-calling model... +Number of utilized reads: 66 +Writing loci, SNPs, and alleles to 'stacks_outputs/'... + Refetching sequencing IDs from stacks_inputs/PopA_01.fq... read 66 sequence IDs. +done. +ustacks is done.