Repository revision
7:a3e86cd9cac7

Repository 'stacks2_kmerfilter'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter

Stacks2: kmer filter tool metadata
Miscellaneous
Identify PCR clones
stacks2_kmerfilter
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter/stacks2_kmerfilter/2.55+galaxy4
2.55+galaxy4
process_radtags -h |& grep process_radtags | head -n 1 | cut -d" " -f 2
True
Version lineage of this tool (guids ordered most recent to oldest)
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter/stacks2_kmerfilter/2.55+galaxy4 (this tool)
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter/stacks2_kmerfilter/2.55+galaxy3
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter/stacks2_kmerfilter/2.55+galaxy2
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter/stacks2_kmerfilter/2.55+galaxy1
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter/stacks2_kmerfilter/2.55+galaxy0
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter/stacks2_kmerfilter/2.53+galaxy0
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter/stacks2_kmerfilter/2.4+galaxy1
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_kmerfilter/stacks2_kmerfilter/2.4+galaxy0
stacks2_kmerfilter
Requirements (dependencies defined in the <requirements> tag set)
name version type
stacks 2.55 package
python 3.7 package
findutils 4.6.0 package
Additional information about this tool
#from os.path import splitext
    #import re

    #def clean_ext($identifier)
        #while $identifier.endswith(('.1', '.2', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam'))
            #set $identifier = splitext($identifier)[0]
        #end while
$identifier#slurp
    #end def

    #def fastq_input_foo( $sample, $read_direction="", $infix="" )
        #set $name = $clean_ext($sample.element_identifier)
        #if $sample.is_collection:
            #set $cur_sample=$sample[$read_direction]
        #else:
            #set $cur_sample=$sample
        #end if

        #if $cur_sample.is_of_type('fastqsanger')
            #set $ext =  "fastq"
            #set $inputype = "fastq"
        #else if $cur_sample.is_of_type('fastqsanger.gz')
            #set $ext = "fastq.gz"
            #set $inputype = "gzfastq"
        #else if $cur_sample.is_of_type('fasta')
            #set $ext = "fasta"
            #set $inputype = "fasta"
        #else if $cur_sample.is_of_type('fasta.gz')
            #set $ext = "fasta.gz"
            #set $inputype = "gzfasta"
        #else
            #set $inputype = "UNKNOWN"
        #end if
        #set $data_path = "stacks_inputs/"+$name+$infix+"."+$ext
        #set $link_cmd = "ln -s '%s' '%s' &&" % ($cur_sample, $data_path)
        #return ($link_cmd, $data_path, $name, $inputype)
    #end def

    ## fastq_input_batch determine link command, access path(s), and input type
    ## for batch tools
    ##
    ## inputs
    ## - sample data set / pair
    ## - type "single" / "paired"
    ## return (link_command, fwd_path, rev_path, inputype)
    ## - link_command bash command(s) to link the data sets
    ## - fwd_path file name of the link to the forward data set
    ## - rev_path file name of the link to the forward data set (if type=paired)
    ## - inputype input type as used in stacks ([gz]fast(a|q))
    #def fastq_input_batch($sample, $type)
        #if $type == "single"
            #set ($link_cmd, $path, $name, $inputype) = $fastq_input_foo($sample, "", "")
            #return ($link_cmd, $path, "", $inputype)
        #else:
            #set ($fwd_link_cmd, $fwd_path, $name, $inputype) = $fastq_input_foo($sample, "forward", ".1")
            #set ($rev_link_cmd, $rev_path, $name, $inputype) = $fastq_input_foo($sample, "reverse", ".2")
            #return ( $fwd_link_cmd+$rev_link_cmd, $fwd_path, $rev_path, $inputype)
        #end if
    #end def

    ## fastq_input_nonbatch determine link command, access path(s), and input type
    ## for non-batch tools (procrad, shortreads, denovomap the former need R[12]_
    ## and the latter needs .[12])
    ##
    ## inputs
    ## - samples list of data set / pair
    ## - type "single" / "paired"
    ## - infix_pattern pattern for the infix of the files (needs to contain %d which is replaced by 1/2)
    ## return (link_command, inputype)
    ## - link_command bash command(s) to link the data sets
    ## - inputype input type as used in stacks ([gz]fast(a|q))
    #def fastq_input_nonbatch( $samples, $type, $infix_pattern )
        #set $link_command = ""
        #for $sample in $samples
            #if $type == "single"
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "", "")
                #set link_command += lc
            #else:
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "forward", $infix_pattern % (1))
                #set link_command += lc
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "reverse", $infix_pattern % (2))
                #set link_command += lc
            #end if
        #end for
        #return ($link_command, $inputype)
    #end def
    
trap ">&2 cat '$output_log'" err exit &&
mkdir stacks_inputs stacks_outputs &&

#set ($link_command, $fwd_path, $rev_path, $inputype) = $fastq_input_batch($input_type.fqinputs, $input_type.input_type_select)
$link_command

kmer_filter
#if $input_type.input_type_select == 'single':
    -f '$fwd_path'
#else
    -1 '$fwd_path'
    -2 '$rev_path'
#end if
## TODO    $options_kmer_char.read_k_freq
-i $inputype
-o stacks_outputs
$capture
-y fastq
$options_filtering.rare
$options_filtering.abundant
--k_len $options_filtering.k_len
--max_k_freq $options_advanced_filtering.max_k_freq
#if str($options_advanced_filtering.min_lim)!="":
    --min_lim $options_advanced_filtering.min_lim
#end if
#if str($options_advanced_filtering.max_lim)!="":
    --max_lim $options_advanced_filtering.max_lim
#end if
#if str($options_normalization.normalize)!="":
    --normalize $options_normalization.normalize
#end if
#if $options_kmer_char.write_k_freq
    --write-k-freq $kfreqdist
#end if
## TODO read_k_freq
$options_kmer_char.k_dist
#if $options_kmer_char.k_dist
    > '$kfreq'
#end if

        #if $output_log
            2> '$output_log'
        #end if
    

#if $options_kmer_char.k_dist 
    && sed -i -e 's/KmerFrequency/# KmerFrequency/' '$kfreq'
#elif $options_kmer_char.write_k_freq
    && sed -i -e 's/# Kmer Count/#Kmer\tCount/; s/ /\t/' '$kfreqdist';
#else
    ## move outputs such that Galaxy can find them
    ## if filtering is on then ...filt...fq is created
    ## if normalization is on then ...norm...fq is created
    ## if both are active then both files are created, but only norm is needed
    #if str($options_filtering.rare)!="" or str($options_filtering.abundant)!="" or str($options_normalization.normalize)!="":
        #if str($options_normalization.normalize)!="":
            #set infix="norm"
        #else
            #set infix="fil"
        #end if
        #if $capture:
            #if $input_type.input_type_select == "single"
                && mv stacks_outputs/*.discards.fastq '$discarded'
            #else
                && mv stacks_outputs/*.1.discards.fastq '$discarded_pair.forward'
                && mv stacks_outputs/*.2.discards.fastq '$discarded_pair.reverse'
            #end if
        #end if
        #if $input_type.input_type_select == "single"
            && mv stacks_outputs/*.${infix}.fastq '$clean'
        #else
            && mv stacks_outputs/*.1.${infix}.fastq '$clean_pair.forward'
            && mv stacks_outputs/*.2.${infix}.fastq '$clean_pair.reverse'
        #end if
    #end if
#end if
    
None
False
Functional tests
name inputs outputs required files
Test-1 input_type|fqinputs: clonefilter/R1_0001.1.fq.gz
input_type|input_type_select: single
options_filtering|rare: True
options_filtering|abundant: True
options_filtering|k_len: 16
add_log: True
name: value
name: value
name: value
clonefilter/R1_0001.1.fq.gz
value
Test-2 input_type|fqinputs: paired collection
input_type|input_type_select: paired
capture: True
options_normalization|normalize: 1
add_log: True
name: value
clonefilter/R1_0001.1.fq.gz
clonefilter/R2_0001.2.fq.gz
value
Test-3 input_type|fqinputs: clonefilter/R1_0001.1.fq.gz
input_type|input_type_select: single
options_kmer_char|write_k_freq: True
add_log: True
name: value
name: value
clonefilter/R1_0001.1.fq.gz
value
Test-4 input_type|fqinputs: clonefilter/R1_0001.1.fq.gz
input_type|input_type_select: single
options_kmer_char|k_dist: True
add_log: False
name: value
clonefilter/R1_0001.1.fq.gz
value