Repository revision
6:3459ad07928e

Repository 'stacks2_procrad'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad

Stacks2: process radtags tool metadata
Miscellaneous
the Stacks demultiplexing script
stacks2_procrad
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad/stacks2_procrad/2.55+galaxy3
2.55+galaxy3
process_radtags -h |& grep process_radtags | head -n 1 | cut -d" " -f 2
True
Version lineage of this tool (guids ordered most recent to oldest)
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad/stacks2_procrad/2.55+galaxy4
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad/stacks2_procrad/2.55+galaxy3 (this tool)
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad/stacks2_procrad/2.55+galaxy2
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad/stacks2_procrad/2.55+galaxy1
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad/stacks2_procrad/2.55+galaxy0
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad/stacks2_procrad/2.53+galaxy0
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad/stacks2_procrad/2.4+galaxy1
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_procrad/stacks2_procrad/2.4+galaxy0
stacks2_procrad
Requirements (dependencies defined in the <requirements> tag set)
name version type
stacks 2.55 package
python 3.7 package
findutils 4.6.0 package
Additional information about this tool
#from os.path import splitext
    #import re

    #def clean_ext($identifier)
        #while $identifier.endswith(('.1', '.2', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam'))
            #set $identifier = splitext($identifier)[0]
        #end while
$identifier#slurp
    #end def

    #def fastq_input_foo( $sample, $read_direction="", $infix="" )
        #set $name = $clean_ext($sample.element_identifier)
        #if $sample.is_collection:
            #set $cur_sample=$sample[$read_direction]
        #else:
            #set $cur_sample=$sample
        #end if

        #if $cur_sample.is_of_type('fastqsanger')
            #set $ext =  "fastq"
            #set $inputype = "fastq"
        #else if $cur_sample.is_of_type('fastqsanger.gz')
            #set $ext = "fastq.gz"
            #set $inputype = "gzfastq"
        #else if $cur_sample.is_of_type('fasta')
            #set $ext = "fasta"
            #set $inputype = "fasta"
        #else if $cur_sample.is_of_type('fasta.gz')
            #set $ext = "fasta.gz"
            #set $inputype = "gzfasta"
        #else
            #set $inputype = "UNKNOWN"
        #end if
        #set $data_path = "stacks_inputs/"+$name+$infix+"."+$ext
        #set $link_cmd = "ln -s '%s' '%s' &&" % ($cur_sample, $data_path)
        #return ($link_cmd, $data_path, $name, $inputype)
    #end def

    ## fastq_input_batch determine link command, access path(s), and input type
    ## for batch tools
    ##
    ## inputs
    ## - sample data set / pair
    ## - type "single" / "paired"
    ## return (link_command, fwd_path, rev_path, inputype)
    ## - link_command bash command(s) to link the data sets
    ## - fwd_path file name of the link to the forward data set
    ## - rev_path file name of the link to the forward data set (if type=paired)
    ## - inputype input type as used in stacks ([gz]fast(a|q))
    #def fastq_input_batch($sample, $type)
        #if $type == "single"
            #set ($link_cmd, $path, $name, $inputype) = $fastq_input_foo($sample, "", "")
            #return ($link_cmd, $path, "", $inputype)
        #else:
            #set ($fwd_link_cmd, $fwd_path, $name, $inputype) = $fastq_input_foo($sample, "forward", ".1")
            #set ($rev_link_cmd, $rev_path, $name, $inputype) = $fastq_input_foo($sample, "reverse", ".2")
            #return ( $fwd_link_cmd+$rev_link_cmd, $fwd_path, $rev_path, $inputype)
        #end if
    #end def

    ## fastq_input_nonbatch determine link command, access path(s), and input type
    ## for non-batch tools (procrad, shortreads, denovomap the former need R[12]_
    ## and the latter needs .[12])
    ##
    ## inputs
    ## - samples list of data set / pair
    ## - type "single" / "paired"
    ## - infix_pattern pattern for the infix of the files (needs to contain %d which is replaced by 1/2)
    ## return (link_command, inputype)
    ## - link_command bash command(s) to link the data sets
    ## - inputype input type as used in stacks ([gz]fast(a|q))
    #def fastq_input_nonbatch( $samples, $type, $infix_pattern )
        #set $link_command = ""
        #for $sample in $samples
            #if $type == "single"
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "", "")
                #set link_command += lc
            #else:
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "forward", $infix_pattern % (1))
                #set link_command += lc
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "reverse", $infix_pattern % (2))
                #set link_command += lc
            #end if
        #end for
        #return ($link_command, $inputype)
    #end def
    

#if $input_type.barcode_encoding != ''
    python '$__tool_directory__'/check_bcfile.py '$input_type.barcode' &&
#end if
mkdir stacks_inputs stacks_outputs &&

#if $output_log
    ln -s '$output_log' stacks_outputs/process_radtags.stacks_inputs.log &&
#end if

#set ($link_command, $inputype) = $fastq_input_nonbatch( $input_type.fqinputs, $input_type.input_type_select, "_R%d_0" )
$link_command

process_radtags


    -p stacks_inputs/
    #if $input_type.input_type_select == "paired"
        --paired
    #end if
    -i $inputype
    $input_type.barcode_encoding
    #if $input_type.barcode_encoding != ''
        -b '$input_type.barcode'
    #end if
    #if str( $outype ) != "auto"
        -y $outype
    #end if
    -o stacks_outputs
    

    #if $filter_cond.filter_select == 'yes':
        -w $filter_cond.sliding
        -s $filter_cond.score
        $filter_cond.remove
        $filter_cond.discard
        $filter_cond.filter_illumina
    #else
        #if str($filter_cond.len_limit) != "":
            --len_limit $filter_cond.len_limit
        #end if
    #end if
    $capture
    

    #if str($options_advanced.truncate)
        -t $options_advanced.truncate
    #end if
    $options_advanced.retain_header
    

    #if str($options_advanced.rescue_cond.rescue) != ""
        $options_advanced.rescue_cond.rescue
        #if str($options_advanced.rescue_cond.barcode_dist_1) != "":
            --barcode_dist_1 $options_advanced.rescue_cond.barcode_dist_1
        #end if
        #if str($options_advanced.rescue_cond.barcode_dist_2) != "":
            --barcode_dist_2 $options_advanced.rescue_cond.barcode_dist_2
        #end if
    #end if
    

    ## Adapter options
    #if str($options_advanced.adapter_1) != "":
        --adapter_1 $options_advanced.adapter_1
    #end if
    #if str($options_advanced.adapter_2) != "":
        --adapter_2 $options_advanced.adapter_2
    #end if
    #if str($options_advanced.adapter_mm) != "":
        --adapter_mm $options_advanced.adapter_mm
    #end if
    

## -E not implemented in Galaxy defaults to phred33

## Restriction enzyme options
#if str($options_enzyme.enzyme) != '':
    -e $options_enzyme.enzyme
#end if
#if str( $options_enzyme.options_enzyme_selector ) == "2" and str($options_enzyme.enzyme2)!='':
    --renz_2 $options_enzyme.enzyme2
#end if

## advanced options not shared between shortreads and radtags
$options_advanced.bestrad
$options_advanced.disable_rad_check

## Output options
## --merge not implemented in Galaxy


    #if $capture:
        && mkdir stacks_outputs/discarded/
        && mv stacks_outputs/*discards stacks_outputs/discarded/

        ## fix the _R[12]_0 that was added for preparing the input
        #if $input_type.input_type_select == 'paired':
            && (find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/_R1_0/.1/; s/_R2_0/.2/;')"; done)
        #end if
        ## also remove the gz which is added by procrad (but its uncompressed)
        && (find stacks_outputs/discarded/ -type f -iname "*.gz.discards" | while read file; do mv "\$file" "\$(echo \$file | sed 's/.gz.discards$/.discards/;')"; done)

        ## the discard files are named fastq even if the output is fasta
        #if str($outype).endswith("fasta"):
            && (find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fa/;')"; done)
        #else
            && (find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fq/;')"; done)
        #end if
    #end if
    ## prepare paired read output for processing in galaxy
    #if $input_type.input_type_select == 'paired':
        && mkdir stacks_outputs/remaining
        && (find stacks_outputs -iregex ".*\.rem\.[12]\.f[aq]\(\.gz\)?" | while read file; do mv "\$file" stacks_outputs/remaining/; done)
        && (find stacks_outputs/ -iregex ".*.f[aq]\(\.gz\)?" | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.1\./.forward./; s/\.2\./.reverse./')"; done)
    #end if
    
    
None
False
Functional tests
name inputs outputs required files
Test-1 input_type|fqinputs: ['procrad/R1_01.fq', 'procrad/R1_02.fq', 'procrad/R1_03.fq', 'procrad/R1_04.fq']
input_type|barcode_encoding: --inline_null
input_type|barcode: procrad/barcodes
input_type|input_type_select: single
options_enzyme|enzyme: ecoRI
options_enzyme|options_enzyme_selector: 1
add_log: True
name: value
procrad/R1_01.fq
procrad/R1_02.fq
procrad/R1_03.fq
procrad/R1_04.fq
procrad/barcodes
value
Test-2 input_type|fqinputs: ['procrad/R1.fq.gzip', 'procrad/R2.fq.gzip']
input_type|barcode_encoding: --inline_null
input_type|barcode: procrad/barcodes
input_type|input_type_select: single
options_enzyme|enzyme: ecoRI
options_enzyme|options_enzyme_selector: 1
filter_cond|sliding: 0.1
filter_cond|score: 11
filter_cond|remove: True
filter_cond|discard: True
filter_cond|filter_select: yes
capture: True
outype: gzfastq
add_log: True
name: value
procrad/R1.fq.gzip
procrad/R2.fq.gzip
procrad/barcodes
value
Test-3 input_type|fqinputs: list:paired collection
input_type|barcode: procrad/barcodes
input_type|input_type_select: paired
options_enzyme|enzyme: ecoRI
options_enzyme|options_enzyme_selector: 1
filter_cond|len_limit: 50
filter_cond|filter_select: no
capture: True
outype: gzfasta
add_log: True
name: value
procrad/R1.fq.gzip
procrad/R2.fq.gzip
procrad/barcodes
value
Test-4 input_type|fqinputs: list:paired collection
input_type|barcode: procrad/barcodes
input_type|input_type_select: paired
options_enzyme|enzyme: ecoRI
options_enzyme|enzyme2: ecoRI
options_enzyme|options_enzyme_selector: 2
options_advanced|truncate: 70
options_advanced|retain_header: True
options_advanced|bestrad: True
options_advanced|disable_rad_check: True
options_advanced|rescue_cond|barcode_dist_1: 2
options_advanced|rescue_cond|barcode_dist_2: 2
options_advanced|rescue_cond|rescue: -r
options_advanced|adapter_1: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
options_advanced|adapter_2: TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
options_advanced|adapter_mm: 2
outype: fasta
procrad/R1.fq.gzip
procrad/R2.fq.gzip
procrad/barcodes
Test-5 input_type|fqinputs: ['procrad/R1_01.fq', 'procrad/R1_02.fq', 'procrad/R1_03.fq', 'procrad/R1_04.fq']
input_type|barcode_encoding: --inline_null
input_type|barcode: procrad/barcodes-duplicate
input_type|input_type_select: single
options_enzyme|enzyme: ecoRI
options_enzyme|options_enzyme_selector: 1
add_log: True
procrad/R1_01.fq
procrad/R1_02.fq
procrad/R1_03.fq
procrad/R1_04.fq
procrad/barcodes-duplicate
Test-6 input_type|fqinputs: ['procrad/R1_01.fq', 'procrad/R1_02.fq', 'procrad/R1_03.fq', 'procrad/R1_04.fq']
input_type|barcode_encoding: --inline_null
input_type|barcode: procrad/barcodes-duplicate2
input_type|input_type_select: single
options_enzyme|enzyme: ecoRI
options_enzyme|options_enzyme_selector: 1
add_log: True
procrad/R1_01.fq
procrad/R1_02.fq
procrad/R1_03.fq
procrad/R1_04.fq
procrad/barcodes-duplicate2
Test-7 input_type|fqinputs: procrad/R1.fq.gzip
input_type|barcode_encoding:
input_type|input_type_select: single
options_enzyme|enzyme: ecoRI
options_enzyme|options_enzyme_selector: 1
capture: True
add_log: True
name: value
procrad/R1.fq.gzip
value
Test-8 input_type|fqinputs: list:paired collection
input_type|barcode_encoding:
input_type|input_type_select: paired
options_enzyme|enzyme: ecoRI
options_enzyme|options_enzyme_selector: 1
outype: gzfasta
add_log: True
name: value
procrad/R1.fq.gzip
procrad/R2.fq.gzip
value