view srst2.xml @ 2:e59fdf6145db draft default tip

planemo upload commit 158a919b3605123af3b7c8d720a776fa62b9cba6
author nml
date Wed, 25 Oct 2017 12:03:56 -0400
parents 599a4dc309aa
children
line wrap: on
line source

<tool id="srst2" name="SRST2" version="0.3.7">
    <description>Short Read Sequence Typing for Bacterial Pathogens</description>
    <requirements>
        <requirement type="package" version="0.1.4.6">srst2</requirement>
        <requirement type="package" version="08-07-2014">vfdb</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" level="fatal" description="Unknown error has occurred"/>
    </stdio>
    <command><![CDATA[
        $__tool_directory__/srst2.pl $bam_results $scores $pileup

        #if $mlst_or_genedb.job_type == "mlst_only"
            m $txt_results $alleles
            #if ($mlst_or_genedb.allele_choice.allele_report=="all")
                all
            #else if ($mlst_or_genedb.allele_choice.allele_report=="new")
                new
            #end if
        #else if $mlst_or_genedb.job_type == "custom_only"
            g $genes_results $fullgenes_results
            #*
            to allow multiple custom databases join all db names into comma separated variable then send that variable to the perl script to be parsed
            make the database names an array and then join
            *#
            #set $dbs = ','.join([$database.gene_db.name for $database in ( $mlst_or_genedb.databases )])
            "$dbs"
        #else if $mlst_or_genedb.job_type == "vfdb_only"
            g $genes_results $fullgenes_results $mlst_or_genedb.vfdb_in.name
        #else if $mlst_or_genedb.job_type == "mlst_custom"
            b $txt_results $genes_results $fullgenes_results
            #set $dbs = ','.join([$database.gene_db.name for $database in ( $mlst_or_genedb.databases )])
            "$dbs"
        #else if $mlst_or_genedb.job_type == "mlst_vfdb"
            b $txt_results $genes_results $fullgenes_results $mlst_or_genedb.vfdb_in.name
        #end if

        #if $single_or_paired.type == "single"
            "$single_or_paired.input_se.element_identifier"
            --input_se "$input_se"
        #elif $single_or_paired.type == "paired"
            "$single_or_paired.forward_pe.name"
            --input_pe "$single_or_paired.forward_pe" "$single_or_paired.reverse_pe"
        #else
            "$single_or_paired.fastq_collection.forward.name"
            --input_pe "$single_or_paired.fastq_collection.forward" "$single_or_paired.fastq_collection.reverse"
        #end if

        #if ($mlst_or_genedb.job_type=="mlst_only")
            --mlst_db $mlst_db
            --mlst_definition $mlst_defs
            --mlst_delimiter "'$mlst_or_genedb.mlst_delim'"
            --mlst_max_mismatch $mlst_or_genedb.mlst_max_mismatch
            --report_all_consensus
        #else if ($mlst_or_genedb.job_type=="mlst_vfdb")
            --mlst_db $mlst_db
            --mlst_definition $mlst_defs
            --mlst_delimiter "'$mlst_or_genedb.mlst_delim'"
            --mlst_max_mismatch $mlst_or_genedb.mlst_max_mismatch
            --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
            --gene_db \$VF_PATH/${mlst_or_genedb.vfdb_in.fields.path}
        #else if ($mlst_or_genedb.job_type=="mlst_custom")
            --gene_db
            #for $i, $database in enumerate( $mlst_or_genedb.databases )
                $database.gene_db
            #end for
            --mlst_db $mlst_db
            --mlst_delimiter "'$mlst_or_genedb.mlst_delim'"
            --mlst_max_mismatch $mlst_or_genedb.mlst_max_mismatch
            --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
            --mlst_definition $mlst_defs
        #else if ($mlst_or_genedb.job_type=="vfdb_only")
            --gene_db \$VF_PATH/${mlst_or_genedb.vfdb_in.fields.path}
            --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
        #else if ($mlst_or_genedb.job_type=="custom_only")
            --gene_db
            #for $i, $database in enumerate( $mlst_or_genedb.databases )
                $database.gene_db
            #end for
            --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
        #end if

        --read_type q

        --save_scores

        #if $options.select == "advanced"
            #if $options.min_coverage
                --min_coverage $options.min_coverage
            #end if
            #if $options.max_divergence
                --max_divergence $options.max_divergence
            #end if
            #if $options.min_depth
                --min_depth $options.min_depth
            #end if
            #if $options.min_edge_depth
                --min_edge_depth $options.min_edge_depth
            #end if
            #if $options.prob_err
                --prob_err $options.prob_err
            #end if
            #if $options.stop_after
                --stop_after $options.stop_after
            #end if
                --other "'-p \${GALAXY_SLOTS:-1}
            #if $options.maxins
                --maxins $options.maxins
                --minins $options.minins
            #end if
                '"
            #if $options.mapq
                --mapq $options.mapq
            #end if
            #if $options.baseq
                --baseq $options.baseq
            #end if
        #else
            --other "'-p \${GALAXY_SLOTS:-1}'"
        #end if

        --output \${PWD}/out
    ]]></command>
    <inputs>
        <conditional name="single_or_paired">
            <param name="type" type="select" label="Read type">
                <option value="single">Single-end</option>
                <option value="paired">Paired-end</option>
                <option value="collection">Collection Paired-end</option>
            </param>
            <when value="single">
                <param name="input_se" type="data" format="fastqsanger" label="Single end read file(s)"/>
            </when>
            <when value="paired">
                <param name="forward_pe" type="data" format="fastqsanger" label="Forward paired-end read file"/>
                <param name="reverse_pe" type="data" format="fastqsanger" label="Reverse paired-end read file"/>
            </when>
            <when value="collection">
                <param name="fastq_collection" type="data_collection" label="Paired-end reads collection" optional="false" format="fastqsanger" collection_type="paired" />
            </when>
        </conditional>

        <conditional name="mlst_or_genedb">
            <param name="job_type" type="select" label="Job type">
                <option value="mlst_only">MLST only</option>
                <option value="mlst_vfdb">MLST and VFDB</option>
                <option value="mlst_custom">MLST and custom database</option>
                <option value="vfdb_only">VFDB only</option>
                <option value="custom_only">Custom database only</option>
            </param>
            <when value="mlst_only">
                <param name="mlst_defs" type="data" format="tabular" label="ST definitions for MLST scheme"/>
                <param name="mlst_db" type="data" format="fasta" label="Fasta file of MLST alleles"/>
                <param name="mlst_max_mismatch" type="integer" label="Maximum number of mismatches per read for MLST allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
                <conditional name="allele_choice">
                    <param name="allele_report" type="select" label="Reported Alleles" >
                        <option value="all">All</option>
                        <option value="new">Only New</option>
                    </param>
                    <when value="all"/>
                    <when value="new"/>
                </conditional>
                <param name="mlst_delim" type="text" label="Character(s) separating gene name from allele number in MLST database" value="" help="Typically _ or -" optional="false" >
                    <validator type="expression" message="Must enter a delimiter.">len(value) >= 1</validator>
                </param>
            </when>
            <when value="mlst_vfdb">
                <param name="mlst_defs" type="data" format="tabular"  label="ST definitions for MLST scheme"/>
                <param name="mlst_db" type="data" format="fasta" label="Fasta file of MLST alleles"/>
                <param name="vfdb_in" type="select" label="Choose a VFDB strain">
                    <options from_data_table="vfdb_fasta_files" />
                </param>
                <param name="mlst_max_mismatch" type="integer" label="Maximum number of mismatches per read for MLST allele calling" value=""  help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
                <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value=""  help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
                <param name="mlst_delim" type="text" label="Character(s) separating gene name from allele number in MLST database" value="" help="Typically _ or -" optional="false" >
                    <validator type="expression" message="Must enter a delimiter.">len(value) >= 1</validator>
                </param>
            </when>
            <when value="mlst_custom">
                <param name="mlst_defs" type="data" format="tabular" label="ST definitions for MLST scheme"/>
                <param name="mlst_db" type="data" format="fasta" label="Fasta file of MLST alleles"/>
                <repeat name="databases" title="Databases" min="1">
                    <param name="gene_db" type="data" format="fasta" label="Fasta file for gene database" />
                </repeat>
                <param name="mlst_max_mismatch" type="integer" label="Maximum number of mismatches per read for MLST allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
                <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
                <param name="mlst_delim" type="text" label="Character(s) separating gene name from allele number in MLST database" value="" help="Typically _ or -" optional="false" >
                    <validator type="expression" message="Must enter a delimiter.">len(value) >= 1</validator>
                </param>
            </when>
            <when value="vfdb_only">
                <param name="vfdb_in" type="select" label="Choose a VFDB strain">
                    <options from_data_table="vfdb_fasta_files" >
                        <filter type="sort_by" column="2" />
                        <validator type="no_options" message="No strains are available" />
                    </options>
                </param>
                <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
            </when>
            <when value="custom_only">
                <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
                <repeat name="databases" title="Databases" min="1">
                    <param name="gene_db" type="data" format="fasta" label="Fasta file for gene database" />
                </repeat>
            </when>
        </conditional>
        <conditional name="options">
            <param name="select" type="select" label="Options Type">
                <option value="basic">Basic</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="advanced">
                <param name="min_coverage" type="integer" label="Minimum %coverage cutoff for gene reporting" value="90"/>
                <param name="max_divergence" type="integer" label="Maximum %divergence cutoff for gene reporting" value="10"/>
                <param name="min_depth" type="integer" label="Minimum mean depth to flag as dubious allele call" value="5"/>
                <param name="min_edge_depth" type="integer" label="Minimum edge depth to flag as dubious allele call" value="2"/>
                <param name="prob_err" type="float" label="Probability of sequencing error" value="0.01"/>
                <param name="stop_after" type="integer" label="Stop mapping after this number of reads have been mapped (otherwise map all)" optional="true"/>
                <param name="mapq" type="integer" label="Samtools -q parameter" value="1"/>
                <param name="baseq" type="integer" label="Samtools -Q parameter" value="20"/>
                <param name="minins" type="integer" label="Bowtie 2 -I parameter. The minimum fragment length for valid paired-end alignments." value="0" >
                     <validator type="in_range" message="Must be less than -X parameter." min="0"/>
                </param>
                <param name="maxins" type="integer" label="Bowtie 2 -X parameter. The maximum fragment length for valid paired-end alignments." value="1000" >
                     <validator type="in_range" message="Must be greater than -I parameter." min="0"/>
                </param>

            </when>
            <when value="basic"/>
        </conditional>
    </inputs>

    <outputs>
        <data format="bam" name="bam_results" label="Bam Results"/>
        <data format="tabular" name="scores" label="Scores"/>
        <data format="tabular" name="pileup" label="Pileup"/>
        <data format="fasta" name="alleles" label="Alleles">
            <filter>mlst_or_genedb['job_type']=="mlst_only"</filter>
        </data>
        <data format="tabular" name="txt_results" label="Text Results" >
            <filter>mlst_or_genedb['job_type']!="vfdb_only"</filter>
            <filter>mlst_or_genedb['job_type']!="custom_only"</filter>
        </data>
        <data format="tabular" name="genes_results" label="Genes Results" >
            <filter>mlst_or_genedb['job_type']!="mlst_only"</filter>
        </data>
        <data format="tabular" name="fullgenes_results" label="Full Genes Results" >
            <filter>mlst_or_genedb['job_type']!= "mlst_only"</filter>
        </data>
    </outputs>

    <tests>
        <test>
            <param name="type" value="collection" />
            <param name="fastq_collection">
                <collection type="paired">
                    <element name="forward" value="ERR028678_sampled_1.fastq" />
                    <element name="reverse" value="ERR028678_sampled_2.fastq" />
                </collection>
            </param>
            <param name="job_type" value="mlst_only" />
            <param name="mlst_defs" value="ecoli.txt" />
            <param name="mlst_db" value="Escherichia_coli#1.fasta" />
            <param name="mlst_max_mismatch" value="10" />
            <param name="mlst_delim" value="-" />
            <output name="bam_results" file="ERR028678_sampled.bam" ftype="bam" lines_diff="2" />
            <output name="scores" file="ERR028678_sampled_scores.tabular" />
            <!-- Don't test pileup as it is too large of a file > 90 MB -->
            <!-- <output name="pileup" file="ERR028678_sampled_pileup.tabular" />-->
            <output name="alleles" file="ERR028678_sampled_alleles.fasta" />
            <output name="txt_results" file="ERR028678_sampled_text_results.tabular" />
        </test>
    </tests>


    <help>
What it does
============

Short Read Sequence Typing for Bacterial Pathogens

This program is designed to take Illumina sequence data, a MLST database and/or a database of gene sequences (e.g. resistance genes, virulence genes, etc) and report the presence of STs and/or reference genes. The tool has a database of virulence factors that was extracted from http://www.mgc.ac.cn/VFs/ .

For more information about SRST2 and for instructions on how to format custom databases, visit https://github.com/katholt/srst2


Usage
=====

Basic Options
-------------

**Read Type**
   - Single-end: Single end read file(s) for analysing (--input_se)
   - Paired-end: Paired end read file(s) for analysing (--input_pe)

**Job Type**
    - MLST only: Reports Sequence Types
    - MLST and VFDB: Reports Sequence Types and user can choose one of the built-in Virulence Factor Datebase (VFDB) strains
    - MLST and custom database: Reports Sequence Types and user can upload their own custom database
    - VFDB only: Use can choose one of the built-in Virulence Factor Databasse (VFDB) strains
    - Custom database only: Use can upload their own custom database

**ST definitions for MLST scheme:**
    - Required if you want to calculate STs (--mlst_definitions)

**Fasta file of MLST alleles:**
    - Required if you want to calculate STs (--mlst_db)

**Fasta file for gene database:**
    - Required if you want details of the sequences. The user must provide their own database (--gene_db)

**VFDB strain:**
    - Required if you want details of the sequences. The use may choose one of the listed strains (--gene_db)

**Read file type:**
    - fastq
    - solexa
    - fasta

**Character(s) separating gene name from allele number in MLST database:**
    - Required for all MLST job types
    - Typically either _ or -
    - The output from getMLST will identify the delimiter.

**Maximum number of mismatches per read for MLST allele calling:**
    - Required for all MLST job types
    - For MLST schemas with inserts this number should be set to a high value (recommended: 250)

**Maximum number of mismatches per read for gene allele calling:**
    - Required for all VDFB or custom database job types
    - For genes with inserts this number should be set to a high value (recommended: 250).

**Option Type:**
    - Basic: Includes only the options listed above
    - Advanced: Includes the options listed below

-------------------------------

Advanced Options
----------------

**Minimum %coverage cutoff for gene reporting:**
    - Default is 90 (--min_coverage)

**Maximum %divergence cutoff for gene reporting:**
    - Default is 10 (--max_divergence)

**Minimum mean depth to flag as dubious allele call:**
    - Default is 5 (--min_depth)

**Minimum edge depth to flag as dubious allele call:**
    - Default is 2 (--min_edge_depth)

**Probability of sequencing error:**
    - Default is 0.01 (--prob_err)

**Stop mapping after this number of reads have been mapped (otherwise map all):**
    - Default maps all (--stop_after)

**Other arguments to pass to bowtie2:**
    --other

**Samtools -q parameter:**
    - Default is 1 (--mapq)

**Samtools -Q parameter:**
    - Default is 20 (--baseq)

**Bowtie2 -I/--minins:**
    - The minimum fragment length for valid paired-end alignments. E.g. if -I 60 is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as -X is also satisfied). A 19-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -I constraint is applied with respect to the untrimmed mates.
    - The larger the difference between -I and -X, the slower Bowtie 2 will run. This is because larger differences bewteen -I and -X require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient.
    - Default: 0 (essentially imposing no minimum)

**Bowtie2 -X/--maxins:**
    - The maximum fragment length for valid paired-end alignments. E.g. if -X 100 is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, not the trimmed mates.
    - The larger the difference between -I and -X, the slower Bowtie 2 will run. This is because larger differences bewteen -I and -X require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient.
    - Default: 500.

**Acknowledgments**
    Original Author: Mariam Iskander

    Jen Cabral

    Philip Mabon

    Mark Iskander

    Eric Enns

    </help>
    <citations>
      <citation type="doi">10.1128/AAC.01310-13</citation>
    </citations>
</tool>