Repository revision
6:7b72fde3d27e

Repository 'stacks2_gstacks'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks

Stacks2: gstacks tool metadata
Miscellaneous
Call variants, genotypes and haplotype
stacks2_gstacks
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy3
2.55+galaxy3
process_radtags -h |& grep process_radtags | head -n 1 | cut -d" " -f 2
True
Version lineage of this tool (guids ordered most recent to oldest)
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy4
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy3 (this tool)
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy2
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy1
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy0
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.53+galaxy0
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.4+galaxy1
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.4+galaxy0
stacks2_gstacks
Requirements (dependencies defined in the <requirements> tag set)
name version type
stacks 2.55 package
python 3.7 package
findutils 4.6.0 package
samtools 1.13 package
Additional information about this tool
#from os.path import splitext
    #import re

    #def clean_ext($identifier)
        #while $identifier.endswith(('.1', '.2', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam'))
            #set $identifier = splitext($identifier)[0]
        #end while
$identifier#slurp
    #end def

    #def fastq_input_foo( $sample, $read_direction="", $infix="" )
        #set $name = $clean_ext($sample.element_identifier)
        #if $sample.is_collection:
            #set $cur_sample=$sample[$read_direction]
        #else:
            #set $cur_sample=$sample
        #end if

        #if $cur_sample.is_of_type('fastqsanger')
            #set $ext =  "fastq"
            #set $inputype = "fastq"
        #else if $cur_sample.is_of_type('fastqsanger.gz')
            #set $ext = "fastq.gz"
            #set $inputype = "gzfastq"
        #else if $cur_sample.is_of_type('fasta')
            #set $ext = "fasta"
            #set $inputype = "fasta"
        #else if $cur_sample.is_of_type('fasta.gz')
            #set $ext = "fasta.gz"
            #set $inputype = "gzfasta"
        #else
            #set $inputype = "UNKNOWN"
        #end if
        #set $data_path = "stacks_inputs/"+$name+$infix+"."+$ext
        #set $link_cmd = "ln -s '%s' '%s' &&" % ($cur_sample, $data_path)
        #return ($link_cmd, $data_path, $name, $inputype)
    #end def

    ## fastq_input_batch determine link command, access path(s), and input type
    ## for batch tools
    ##
    ## inputs
    ## - sample data set / pair
    ## - type "single" / "paired"
    ## return (link_command, fwd_path, rev_path, inputype)
    ## - link_command bash command(s) to link the data sets
    ## - fwd_path file name of the link to the forward data set
    ## - rev_path file name of the link to the forward data set (if type=paired)
    ## - inputype input type as used in stacks ([gz]fast(a|q))
    #def fastq_input_batch($sample, $type)
        #if $type == "single"
            #set ($link_cmd, $path, $name, $inputype) = $fastq_input_foo($sample, "", "")
            #return ($link_cmd, $path, "", $inputype)
        #else:
            #set ($fwd_link_cmd, $fwd_path, $name, $inputype) = $fastq_input_foo($sample, "forward", ".1")
            #set ($rev_link_cmd, $rev_path, $name, $inputype) = $fastq_input_foo($sample, "reverse", ".2")
            #return ( $fwd_link_cmd+$rev_link_cmd, $fwd_path, $rev_path, $inputype)
        #end if
    #end def

    ## fastq_input_nonbatch determine link command, access path(s), and input type
    ## for non-batch tools (procrad, shortreads, denovomap the former need R[12]_
    ## and the latter needs .[12])
    ##
    ## inputs
    ## - samples list of data set / pair
    ## - type "single" / "paired"
    ## - infix_pattern pattern for the infix of the files (needs to contain %d which is replaced by 1/2)
    ## return (link_command, inputype)
    ## - link_command bash command(s) to link the data sets
    ## - inputype input type as used in stacks ([gz]fast(a|q))
    #def fastq_input_nonbatch( $samples, $type, $infix_pattern )
        #set $link_command = ""
        #for $sample in $samples
            #if $type == "single"
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "", "")
                #set link_command += lc
            #else:
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "forward", $infix_pattern % (1))
                #set link_command += lc
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "reverse", $infix_pattern % (2))
                #set link_command += lc
            #end if
        #end for
        #return ($link_command, $inputype)
    #end def
    

mkdir bam_inputs stacks_outputs &&

## annoyingly gstacks creates stacks_output/gstacks.log
## instead of just writing to stderr as the other tools
## hence we do not use the tokens and return populations.log as log file and take the stderr
#if $output_log
    ln -s '$output_log' stacks_outputs/gstacks.log &&
#end if

#if $mode_cond.mode_select == "denovo" and not $popmap:
    ## since collections have no len .. yet
    #try:
        #set count = len($input_bam)
    #except:
        #set count = len($input_bam.keys())
    #end try
    #if count == 1:
        #for $bam in $input_bam:
            ln -s '$bam' bam_inputs/catalog.bam &&
        #end for
    #else
        >&2 echo "exactly one (merged) bam file is needed in denovo mode if no population map is given" &&
        exit 1 &&
    #end if
#else
    
    #set $bamlist = ""
    #for $bam in $input_bam:
        #if $bam.is_of_type('bam')
            #set $filename = $clean_ext($bam.element_identifier)+".bam"
            ln -s '$bam' bam_inputs/$filename &&
            #set bamlist += " -B 'bam_inputs/"+$filename+"'"
        #end if
    #end for
    
#end if

gstacks

#if $mode_cond.mode_select == "denovo":
    -P bam_inputs
    $mode_cond.ignore_pe_reads
    #if $mode_cond.advanced_cond.advanced_select == "yes":
        --kmer-length $mode_cond.advanced_cond.kmer_length
        --max-debruijn-reads $mode_cond.advanced_cond.max_debruijn_reads
        --min-kmer-cov $mode_cond.advanced_cond.min_kmer_cov
        $mode_cond.advanced_cond.write_alignments
    #end if
#else:
    #if $popmap
        -I bam_inputs
    #else
        $bamlist
    #end if
    #if $mode_cond.paired_cond.paired_select == ''
        $mode_cond.paired_cond.rm_unpaired_reads
        $mode_cond.paired_cond.rm_pcr_duplicates
    #else:
        $mode_cond.paired_cond.paired_select
    #end if
    #if $mode_cond.advanced_cond.advanced_select == "yes":
        --min-mapq $mode_cond.advanced_cond.min_mapq
        --max-clipped $mode_cond.advanced_cond.max_clipped
        --max-insert-len $mode_cond.advanced_cond.max_insert_len
        $mode_cond.advanced_cond.details
        --phasing-cooccurrences-thr-range $mode_cond.advanced_cond.phasing_cooccurrences_thr_min,$mode_cond.advanced_cond.phasing_cooccurrences_thr_max
        $mode_cond.advanced_cond.phasing_dont_prune_hets
    #end if
#end if
#if $popmap
    -M '$popmap'
#end if
-O stacks_outputs
-t \${GALAXY_SLOTS:-1}

##Model options:
--model $model_cond.model
--var-alpha $model_cond.var_alpha
--gt-alpha $model_cond.gt_alpha


## the bam files generated by gstacks (--write-alignments) are seemingly buggy
## (https://groups.google.com/d/msg/stacks-users/CazwJY1DPGA/7vuahiB2GgAJ)
## so we fix them temporarily by piping them through samtools view (disabling all
## exit codes and stderr output) this adds the samtools requirement
## for later versions where this is fixed the output bam files could just be moved
## to stacks_outputs if this is still necessary
#if $mode_cond.mode_select == "denovo" and $mode_cond.advanced_cond.advanced_select == "yes" and $mode_cond.advanced_cond.write_alignments
    #if $popmap:
        && for b in stacks_outputs/*alns.bam; do (samtools view --no-PG -b "\$b" || true) 2> /dev/null > tmp && mv tmp "\$b"; done
    #else
        && (samtools view --no-PG -b stacks_outputs/alignments.bam || true) 2> /dev/null > tmp && mv tmp stacks_outputs/alignments.bam
    #end if
#end if


    ## the catalog.calls output is a gzip-ed vcf extract it
    ## to make it usable in Galaxy (with the downside that we
    ## need to gzip it again for downstream calls like populations)
    && gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf
    

## TODO extract individual distributions from stacks_outputs/gstacks.log.distribs
## alternative extra tool
## for i in \$(stacks-dist-extract stacks_outputs/gstacks.log.distribs)
## do
##     stacks-dist-extract stacks_outputs/gstacks.log.distribs $i > stacks_outputs/gstacks.log.\$i.tsv
## done
## TODO make optional output collection
    
None
False
Functional tests
name inputs outputs required files
Test-1 input_bam: ['tsv2bam/PopA_01.matches.bam', 'tsv2bam/PopA_02.matches.bam']
popmap: denovo_map/popmap_cstacks.tsv
mode_cond|mode_select: denovo
add_log_distribs: True
add_log: True
name: value
name: value
tsv2bam/PopA_01.matches.bam
tsv2bam/PopA_02.matches.bam
denovo_map/popmap_cstacks.tsv
value
Test-2 input_bam: ['tsv2bam/PopA_01.matches.bam', 'tsv2bam/PopA_02.matches.bam']
popmap: denovo_map/popmap_cstacks.tsv
mode_cond|advanced_cond|write_alignments: True
mode_cond|advanced_cond|advanced_select: yes
mode_cond|mode_select: denovo
add_log: True
name: value
tsv2bam/PopA_01.matches.bam
tsv2bam/PopA_02.matches.bam
denovo_map/popmap_cstacks.tsv
value
Test-3 input_bam: tsv2bam/PopA_01.matches.bam
mode_cond|ignore_pe_reads: True
mode_cond|advanced_cond|kmer_length: 23
mode_cond|advanced_cond|max_debruijn_reads: 666
mode_cond|advanced_cond|min_kmer_cov: 3
mode_cond|advanced_cond|write_alignments: True
mode_cond|advanced_cond|advanced_select: yes
mode_cond|mode_select: denovo
model_cond|var_alpha: 0.1
model_cond|gt_alpha: 0.1
model_cond|model: marukihigh
add_log: True
name: value
name: value
tsv2bam/PopA_01.matches.bam
value
Test-4 input_bam: ['tsv2bam/PopA_01.bam', 'tsv2bam/PopA_02.bam']
mode_cond|paired_cond|rm_pcr_duplicates: True
mode_cond|paired_cond|paired_select:
mode_cond|mode_select: refbased
add_log: True
tsv2bam/PopA_01.bam
tsv2bam/PopA_02.bam
Test-5 input_bam: ['tsv2bam/PopA_01.bam', 'tsv2bam/PopA_02.bam']
popmap: denovo_map/popmap_cstacks.tsv
mode_cond|paired_cond|paired_select: --unpaired
mode_cond|advanced_cond|min_mapq: 23
mode_cond|advanced_cond|max_clipped: 0.23
mode_cond|advanced_cond|max_insert_len: 666
mode_cond|advanced_cond|details: True
mode_cond|advanced_cond|phasing_cooccurrences_thr_min: 2
mode_cond|advanced_cond|phasing_cooccurrences_thr_max: 3
mode_cond|advanced_cond|phasing_dont_prune_hets: True
mode_cond|advanced_cond|advanced_select: yes
mode_cond|mode_select: refbased
model_cond|var_alpha: 0.1
model_cond|gt_alpha: 0.1
model_cond|model: snp
add_log: True
name: value
tsv2bam/PopA_01.bam
tsv2bam/PopA_02.bam
denovo_map/popmap_cstacks.tsv
value
Test-6 input_bam: ['tsv2bam/PopA_01.bam', 'tsv2bam/PopA_02.bam']
mode_cond|paired_cond|paired_select: --ignore-pe-reads
mode_cond|mode_select: refbased
add_log: True
name: value
tsv2bam/PopA_01.bam
tsv2bam/PopA_02.bam
value