Mercurial > repos > iuc > vsearch

<tool id="vsearch_search" name="VSearch search" version="@VERSION@.0">
    <description></description>
    <macros>
        <import>vsearch_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command>
<![CDATA[
    vsearch

        @GENERAL@
        --db "$dbfile"
        #if $dbmask:
            --dbmask "$dbmask"
        #end if

        ##--fulldp                    full dynamic programming alignment (always on)
        ##--gapext STRING             penalties for gap extension (2I/1E)
        ##--gapopen STRING            penalties for gap opening (20I/2E)
        $hardmask
        --id "$id"
        --iddef "$iddef"

        #if str( $qmask ) != 'no':
            --qmask "$qmask"
        #end if

        ## --rowlen INT                width of alignment lines in alnout output (64)

        $self_param
        $selfid_param
        $sizeout
        --strand "$strand"
        --usearch_global "$queryfile"

        #if '--alnout' in str($outputs).split( "," ):
            --alnout "$alnout"
        #end if
        #if '--blast6out' in str($outputs).split( "," ):
            --blast6out "$blast6out"
        #end if
        #if '--dbmatched' in str($outputs).split( "," ):
            --dbmatched "$dbmatched"
        #end if
        #if '--dbnotmatched' in str($outputs).split( "," ):
            --dbnotmatched "$dbnotmatched"
        #end if
        #if '--fastapairs' in str($outputs).split( "," ):
            --fastapairs "$fastapairs"
        #end if
        #if '--notmatched' in str($outputs).split( "," ):
            --notmatched "$notmatched"
        #end if
        #if '--matched' in str($outputs).split( "," ):
            --matched "$matched"
        #end if

        #if $adv_opts.adv_opts_selector == "advanced":
            $adv_opts.top_hits_only
            $adv_opts.rightjust
            $adv_opts.leftjust
            --target_cov "$adv_opts.target_cov"
            --query_cov "$adv_opts.query_cov"


            #if str( $adv_opts.maxid ):
                --maxid "$adv_opts.maxid"
            #end if
            #if str( $adv_opts.maxqt ):
                --maxqt "$adv_opts.maxqsize"
            #end if
            #if str( $adv_opts.maxsizeratio ):
                --maxsizeratio "$adv_opts.maxsizeratio"
            #end if
            #if str( $adv_opts.maxsl ):
                --maxsl "$adv_opts.maxsl"
            #end if
            #if str( $adv_opts.mid ):
                --mid "$adv_opts.mid"
            #end if
            #if str( $adv_opts.minqt ):
                --minqt "$adv_opts.minqt"
            #end if
            #if str( $adv_opts.minsizeratio ):
                --minsizeratio "$adv_opts.minseqlength"
            #end if
            #if str( $adv_opts.minsl ):
                --minsl "$adv_opts.minsl"
            #end if

            #if str( $adv_opts.mintsize ):
                --mintsize "$adv_opts.mintsize"
            #end if
            #if str( $adv_opts.mismatch ):
                --mismatch "$adv_opts.mismatch"
            #end if
            #if str( $adv_opts.maxqsize ):
                --maxqsize "$adv_opts.maxqsize"
            #end if
            #if str( $adv_opts.mincols ):
                --mincols "$adv_opts.mincols"
            #end if
            #if str( $adv_opts.maxsubs ):
                --maxsubs "$adv_opts.maxsubs"
            #end if
            #if str( $adv_opts.maxrejects ):
                --maxrejects "$adv_opts.maxrejects"
            #end if
            #if str( $adv_opts.maxaccepts ):
                --maxaccepts "$adv_opts.maxaccepts"
            #end if
            #if str( $adv_opts.maxdiffs ):
                --maxdiffs "$adv_opts.maxdiffs"
            #end if
            #if str( $adv_opts.maxgaps ):
                --maxgaps "$adv_opts.maxgaps"
            #end if
            #if str( $adv_opts.maxhits ):
                --maxhits "$adv_opts.maxhits"
            #end if
            #if str( $adv_opts.match ):
                --match "$adv_opts.match"
            #end if
            #if str( $adv_opts.idprefix ):
                --idprefix "$adv_opts.idprefix"
            #end if
            #if str( $adv_opts.idsuffix ):
                --idsuffix str( $adv_opts.idsuffix )
            #end if

            #if $adv_opts.uclust_output.uclust_output_select == 'yes':
                --uc "$uc"
                $adv_opts.uclust_output.uc_allhits
            #end if

            #if $adv_opts.userfields_output.userfields_output_select == 'yes':
                --userfields '#echo '+'.join( str($adv_opts.userfields_output.userfields).split(',') )#'
                --userout "$userout"
            #end if

            ##--weak_id REAL              include aligned hits with >= id; continue search
            --wordlength "$adv_opts.wordlength"

            $adv_opts.output_no_hits

        #end if


]]>
    </command>
    <inputs>
        <param name="queryfile" type="data" format="fasta" label="Select your query FASTA file" help="(--usearch_global)" />
        <param name="dbfile" type="data" format="fasta" label="Select your database FASTA file" help="(--db)" />

        <expand macro="qmask" />
        <expand macro="sizeout" />
        <expand macro="strand" />
        <expand macro="self_and_selfid" />
        <expand macro="id_and_iddef" />
        <expand macro="hardmask" />

        <param name="dbmask" type="select" optional="True" label="Mask database sequences" help="(--dbmask)">
            <option value="none" selected="True">No masking</option>
            <option value="dust" selected="True">dust</option>
            <option value="soft">soft</option>
        </param>

        <expand macro="general_output">
            <option value="--dbmatched">FASTA file for matching database sequences</option>
            <option value="--dbnotmatched">FASTA file for non-matching database sequences</option>
            <option value="--notmatched">Write non-matching query sequences to separate file</option>
            <option value="--matched">Write matching query sequences to separate file</option>
        </expand>

        <conditional name="adv_opts">
            <param name="adv_opts_selector" type="select" label="Advanced Options">
              <option value="basic" selected="True">Hide Advanced Options</option>
              <option value="advanced">Show Advanced Options</option>
            </param>
            <when value="basic" />
            <when value="advanced">

                <param name="top_hits_only" type="boolean" truevalue="--top_hits_only" falsevalue="" checked="False"
                    label="Output only hits with identity equal to the best" help="(--top_hits_only)"/>
                <param name="rightjust" type="boolean" truevalue="--rightjust" falsevalue="" checked="False"
                    label="Reject if terminal gaps at alignment right end" help="(--rightjust)"/>
                <param name="leftjust" type="boolean" truevalue="--leftjust" falsevalue="" checked="False"
                    label="Reject if terminal gaps at alignment left end" help="(--leftjust)"/>

                <param name="query_cov" type="float" value="" optional="True" label="Reject if fraction of query seq. aligned lower than this value"
                    help="(--query_cov)"/>
                <param name="target_cov" type="float" value="" optional="True" label="Reject if fraction of target seq. aligned lower than this value"
                    help="(--target_cov)"/>
                <param name="maxid" type="float" value="" optional="True" label="Reject if identity higher than this value"
                    help="(--maxid)"/>
                <param name="maxqt" type="float" value="" optional="True" label="Reject if query/target length ratio higher than this value"
                    help="(--maxqt)"/>
                <param name="maxsizeratio" type="float" value="" optional="True" label="Reject if query/target abundance ratio higher than this value"
                    help="(--maxsizeratio)"/>
                <param name="maxsl" type="float" value="" optional="True" label="Reject if shorter/longer length ratio higher than this value"
                    help="(--maxsl)"/>
                <param name="mid" type="float" value="" optional="True" label="Reject if percent identity lower than this value, ignoring gaps"
                    help="(--mid)"/>
                <param name="minqt" type="float" value="" optional="True" label="Reject if query/target length ratio lower than this value"
                    help="(--minqt)"/>
                <param name="minsl" type="float" value="" optional="True" label="Reject if shorter/longer length ratio lower than this value"
                    help="(--minsl)"/>
                <param name="minsizeratio" type="float" value="" optional="True" label="Reject if query/target abundance ratio lower than this value"
                    help="(--minsizeratio)"/>

                <param name="maxqsize" type="integer" value="" optional="True" label="Reject if query abundance larger than this value"
                    help="(--maxqsize)"/>
                <param name="mincols" type="integer" value="" optional="True" label="Reject if alignment length shorter than this value"
                    help="(--mincols)"/>
                <param name="maxsubs" type="integer" value="" optional="True" label="Reject if more substitutions than this value"
                    help="(--maxsubs)"/>
                <expand macro="maxrejects" />
                <expand macro="maxaccepts" />
                <param name="maxdiffs" type="integer" value="" optional="True" label="Reject if more substitutions or indels than this value"
                    help="(--maxdiffs)"/>
                <param name="maxgaps" type="integer" value="" optional="True" label="Reject if more indels than this value"
                    help="(--maxgaps)"/>
                <param name="maxhits" type="integer" value="" optional="True" label="Maximum number of hits to show"
                    help="(--maxhits)"/>
                <param name="match" type="integer" value="2" label="Score for match"
                    help="(--match)"/>
                <param name="idprefix" type="integer" value="" optional="True" label="Reject if first n nucleotides do not match"
                    help="(--idprefix)"/>
                <param name="idsuffix" type="integer" value="" optional="True" label="Reject if last n nucleotides do not match"
                    help="(--idsuffix)"/>
                <param name="mintsize" type="integer" value="" optional="True" label="Reject if target abundance lower"
                    help="(--mintsize)"/>
                <param name="mismatch" type="integer" value="-4" label="Score for mismatch"
                    help="(--mismatch)"/>

                <conditional name="uclust_output">
                    <param name="uclust_output_select" type="select" label="UCLUST-like output" help="(--uc)">
                        <option value="yes">Yes</option>
                        <option value="no" selected="True">No</option>
                    </param>
                    <when value="yes">
                        <param name="uc_allhits" type="boolean" truevalue="--uc_allhits" falsevalue="" checked="False"
                            label="Show all, not just top hits" help="(--uc_allhits)"/>
                    </when>
                    <when value="no" />
                </conditional>

                <param name="wordlength" type="integer" value="8" min="3" max="15" label="Length of words for database index 3-15"
                    help="Allowed values: 3-15 (--wordlength)"/>
                <param name="output_no_hits" type="boolean" truevalue="--output_no_hits" falsevalue="" checked="False"
                    label="Write non-matching queries to the output file" help="(--output_no_hits)"/>

                <expand macro="userfields" />
            </when>
        </conditional>


    </inputs>
    <outputs>
        <data name="uc" format="fasta" label="${tool.name} on ${on_string}: UCLUST like output">
            <filter>adv_opts['adv_opts_selector'] == "advanced" and adv_opts['uclust_output']['uclust_output_select'] == 'yes'</filter>
        </data>
        <data name="dbnotmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched database sequences">
            <filter>'--dbnotmatchedt' in outputs</filter>
        </data>
        <data name="dbmatched" format="fasta" label="${tool.name} on ${on_string}: Matched database sequences">
            <filter>'--dbmatched' in outputs</filter>
        </data>
        <data name="alnout" format="fasta" label="${tool.name} on ${on_string}: Alignment">
            <filter>'--alnout' in outputs</filter>
        </data>
        <data name="blast6out" format="tabular" label="${tool.name} on ${on_string}: BLAST like tabular">
            <filter>'--blast6out' in outputs</filter>
        </data>
        <data name="notmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched queries">
            <filter>'--notmatched' in outputs</filter>
        </data>
        <data name="matched" format="fasta" label="${tool.name} on ${on_string}: Matching query sequences">
            <filter>'--matched' in outputs</filter>
        </data>
        <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences">
            <filter>'--fastapairs' in outputs</filter>
        </data>
        <data name="userout" format="tabular" label="${tool.name} on ${on_string}: tabular output">
            <filter>adv_opts['adv_opts_selector'] == "advanced" and adv_opts['userfields_output']['userfields_output_select'] == 'yes'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="dbfile" value="three_human_mRNA.fasta" ftype="fasta" />
            <param name="queryfile" value="query.fasta" ftype="fasta" />
            <param name="outputs" value="--blast6out,--dbmatched" />
            <param name="dbmask" value="none" />
            <param name="adv_opts_selector" value="advanced" />
            <param name="top_hits_only" value="True" />
            <output name="dbmatched" file="search_dbmatched_result1.fasta" ftype="fasta" />
            <output name="blast6out" file="search_blast6out_result1.tabular" ftype="tabular" />
        </test>
        <test>
            <param name="dbfile" value="three_human_mRNA.fasta" ftype="fasta" />
            <param name="queryfile" value="query.fasta" ftype="fasta" />
            <param name="outputs" value="--fastapairs,--matched" />
            <param name="dbmask" value="none" />
            <param name="adv_opts_selector" value="advanced" />
            <param name="userfields_output_select" value="yes" />
            <param name="userfields" value="query,target,evalue" />
            <param name="top_hits_only" value="True" />
            <param name="output_no_hits" value="True" />
            <output name="matched" file="search_matched_result2.fasta" ftype="fasta" />
            <output name="fastapairs" file="search_fastapairs_result2.fasta" ftype="fasta" />
            <output name="userout" file="search_userfields_result2.tabular" ftype="tabular" />
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

Sequence search based on vsearch.

@USERFIELDS_HELP@

Searching options
  --alnout FILENAME           filename for human-readable alignment output
  --blast6out FILENAME        filename for blast-like tab-separated output
  --db FILENAME               filename for FASTA formatted database for search
  --dbmask                    mask db with "dust", "soft" or "none" method (dust)
  --dbmatched FILENAME        FASTA file for matching database sequences
  --dbnotmatched FILENAME     FASTA file for non-matching database sequences
  --fastapairs FILENAME       FASTA file with pairs of query and target
  --fulldp                    full dynamic programming alignment (always on)
  --gapext STRING             penalties for gap extension (2I/1E)
  --gapopen STRING            penalties for gap opening (20I/2E)
  --hardmask                  mask by replacing with N instead of lower case
  --id REAL                   reject if identity lower
  --iddef INT                 id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)
  --idprefix INT              reject if first n nucleotides do not match
  --idsuffix INT              reject if last n nucleotides do not match
  --leftjust                  reject if terminal gaps at alignment left end
  --match INT                 score for match (2)
  --matched FILENAME          FASTA file for matching query sequences
  --maxaccepts INT            number of hits to accept and show per strand (1)
  --maxdiffs INT              reject if more substitutions or indels
  --maxgaps INT               reject if more indels
  --maxhits INT               maximum number of hits to show (unlimited)
  --maxid REAL                reject if identity higher
  --maxqsize INT              reject if query abundance larger
  --maxqt REAL                reject if query/target length ratio higher
  --maxrejects INT            number of non-matching hits to consider (32)
  --maxsizeratio REAL         reject if query/target abundance ratio higher
  --maxsl REAL                reject if shorter/longer length ratio higher
  --maxsubs INT               reject if more substitutions
  --mid REAL                  reject if percent identity lower, ignoring gaps
  --mincols INT               reject if alignment length shorter
  --minqt REAL                reject if query/target length ratio lower
  --minsizeratio REAL         reject if query/target abundance ratio lower
  --minsl REAL                reject if shorter/longer length ratio lower
  --mintsize INT              reject if target abundance lower
  --mismatch INT              score for mismatch (-4)
  --notmatched FILENAME       FASTA file for non-matching query sequences
  --output_no_hits            output non-matching queries to output files
  --qmask                     mask query with "dust", "soft" or "none" method (dust)
  --query_cov REAL            reject if fraction of query seq. aligned lower
  --rightjust                 reject if terminal gaps at alignment right end
  --rowlen INT                width of alignment lines in alnout output (64)
  --self                      reject if labels identical
  --selfid                    reject if sequences identical
  --sizeout                   write abundance annotation to output
  --strand                    search "plus" or "both" strands (plus)
  --target_cov REAL           reject if fraction of target seq. aligned lower
  --top_hits_only             output only hits with identity equal to the best
  --uc FILENAME               filename for UCLUST-like output
  --uc_allhits                show all, not just top hit with uc output
  --usearch_global FILENAME   filename of queries for global alignment search
  --userfields STRING         fields to output in userout file
  --userout FILENAME          filename for user-defined tab-separated output
  --weak_id REAL              include aligned hits with >= id; continue search
  --wordlength INT            length of words for database index 3-15 (8)

@EXTERNAL_DOCUMENTATION@


]]>
    </help>
    <expand macro="citations" />
</tool>
author	iuc
date	Fri, 07 Sep 2018 11:31:31 -0400
parents	4258854759ba
children	9495df9dd6ef