diff search.xml @ 0:fae6527990af draft

Imported from capsule None
author iuc
date Thu, 21 May 2015 03:58:09 -0400
parents
children f29e21388219
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/search.xml	Thu May 21 03:58:09 2015 -0400
@@ -0,0 +1,391 @@
+<tool id="vsearch_search" name="VSearch search" version="@VERSION@.0">
+    <description></description>
+    <macros>
+        <import>vsearch_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+    vsearch
+
+        @GENERAL@
+        --db $dbfile
+        --dbmask $dbmask
+
+        ##--fulldp                    full dynamic programming alignment (always on)
+        ##--gapext STRING             penalties for gap extension (2I/1E)
+        ##--gapopen STRING            penalties for gap opening (20I/2E)
+        $hardmask
+        --id $id
+        --iddef $iddef
+
+        #if $qmask != 'no':
+            --qmask $qmask
+        #end if
+
+        ## --rowlen INT                width of alignment lines in alnout output (64)
+
+        $self_param
+        $selfid_param
+        #if $sizeout:
+            --sizeout $sizeout
+        #end if
+        --strand $strand
+        --usearch_global $queryfile
+
+        #if '--alnout' in str($outputs):
+            --alnout $alnout
+        #end if
+        #if '--blast6out' in str($outputs):
+            --blast6out $blast6out
+        #end if
+        #if '--dbmatched' in str($outputs):
+            --dbmatched $dbmatched
+        #end if
+        #if '--dbnotmatched' in str($outputs):
+            --dbnotmatched $dbnotmatched
+        #end if
+        #if '--fastapairs' in str($outputs):
+            --fastapairs $fastapairs
+        #end if
+        #if '--notmatched' in str($outputs):
+            --notmatched $notmatched
+        #end if
+        #if '--matched' in str($outputs):
+            --matched $matched
+        #end if
+
+        #if $adv_opts.adv_opts_selector == "advanced":
+            $adv_opts.top_hits_only
+            $adv_opts.rightjust
+            $adv_opts.leftjust
+            --target_cov $adv_opts.target_cov
+            --query_cov $adv_opts.query_cov
+
+
+            #if $adv_opts.maxid:
+                --maxid $adv_opts.maxid
+            #end if
+            #if $adv_opts.maxqt:
+                --maxqt $adv_opts.maxqsize
+            #end if
+            #if $adv_opts.maxsizeratio:
+                --maxsizeratio $adv_opts.maxsizeratio
+            #end if
+            #if $adv_opts.maxsl:
+                --maxsl $adv_opts.maxsl
+            #end if
+            #if $adv_opts.mid:
+                --mid $adv_opts.mid
+            #end if
+            #if $adv_opts.minqt:
+                --minqt $adv_opts.minqt
+            #end if
+            #if $adv_opts.minsizeratio:
+                --minsizeratio $adv_opts.minseqlength
+            #end if
+            #if $adv_opts.minsl:
+                --minsl $adv_opts.minsl
+            #end if
+
+            #if $adv_opts.mintsize:
+                --mintsize $adv_opts.mintsize
+            #end if
+            #if $adv_opts.mismatch:
+                --mismatch $adv_opts.mismatch
+            #end if
+            #if $adv_opts.maxqsize:
+                --maxqsize $adv_opts.maxqsize
+            #end if
+            #if $adv_opts.mincols:
+                --mincols $adv_opts.mincols
+            #end if
+            #if $adv_opts.maxsubs:
+                --maxsubs $adv_opts.maxsubs
+            #end if
+            #if $adv_opts.maxrejects:
+                --maxrejects $adv_opts.maxrejects
+            #end if
+            #if $adv_opts.maxaccepts:
+                --maxaccepts $adv_opts.maxaccepts
+            #end if
+            #if $adv_opts.maxdiffs:
+                --maxdiffs $adv_opts.maxdiffs
+            #end if
+            #if $adv_opts.maxgaps:
+                --maxgaps $adv_opts.maxgaps
+            #end if
+            #if $adv_opts.maxhits:
+                --maxhits $adv_opts.maxhits
+            #end if
+            #if $adv_opts.match:
+                --match $adv_opts.match
+            #end if
+            #if $adv_opts.idprefix:
+                --idprefix $adv_opts.idprefix
+            #end if
+            #if $adv_opts.idsuffix:
+                --idsuffix $adv_opts.idsuffix
+            #end if
+
+            #if $adv_opts.uclust_output.uclust_output_select == 'yes':
+                --uc
+                $adv_opts.uclust_output.uc_allhits
+            #end if
+
+            #if $adv_opts.userfields_output.userfields_output_select == 'yes':
+                --userfields '#echo '+'.join( str($adv_opts.userfields_output.userfields).split(',') )#'
+                --userout $userout
+            #end if
+
+            ##--weak_id REAL              include aligned hits with >= id; continue search
+            --wordlength $adv_opts.wordlength
+
+            $$adv_opts.output_no_hits
+
+        #end if
+
+
+]]>
+    </command>
+    <inputs>
+        <param name="queryfile" type="data" format="fasta" label="Select your query FASTA file" help="(--usearch_global)" />
+        <param name="dbfile" type="data" format="fasta" label="Select your database FASTA file" help="(--db)" />
+
+        <expand macro="qmask" />
+        <expand macro="sizeout" />
+        <expand macro="strand" />
+        <expand macro="self_and_selfid" />
+        <expand macro="id_and_iddef" />
+        <expand macro="hardmask" />
+
+        <param name="dbmask" type="select" optional="True" label="Mask database sequences" help="(--dbmask)">
+            <option value="none" selected="True">No masking</option>
+            <option value="dust" selected="True">dust</option>
+            <option value="soft">soft</option>
+        </param>
+
+        <expand macro="general_output">
+            <option value="--dbmatched">FASTA file for matching database sequences</option>
+            <option value="--dbnotmatched">FASTA file for non-matching database sequences</option>
+            <option value="--notmatched">Write non-matching query sequences to separate file</option>
+            <option value="--matched">Write matching query sequences to separate file</option>
+        </expand>
+
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+
+                <param name="top_hits_only" type="boolean" truevalue="--top_hits_only" falsevalue="" checked="False" 
+                    label="Output only hits with identity equal to the best" help="(--top_hits_only)"/>
+                <param name="rightjust" type="boolean" truevalue="--rightjust" falsevalue="" checked="False" 
+                    label="Reject if terminal gaps at alignment right end" help="(--rightjust)"/>
+                <param name="leftjust" type="boolean" truevalue="--leftjust" falsevalue="" checked="False" 
+                    label="Reject if terminal gaps at alignment left end" help="(--leftjust)"/>
+
+                <param name="query_cov" type="float" value="" optional="True" label="Reject if fraction of query seq. aligned lower than this value"
+                    help="(--query_cov)"/>
+                <param name="target_cov" type="float" value="" optional="True" label="Reject if fraction of target seq. aligned lower than this value"
+                    help="(--target_cov)"/>
+                <param name="maxid" type="float" value="" optional="True" label="Reject if identity higher than this value"
+                    help="(--maxid)"/>
+                <param name="maxqt" type="float" value="" optional="True" label="Reject if query/target length ratio higher than this value"
+                    help="(--maxqt)"/>
+                <param name="maxsizeratio" type="float" value="" optional="True" label="Reject if query/target abundance ratio higher than this value"
+                    help="(--maxsizeratio)"/>
+                <param name="maxsl" type="float" value="" optional="True" label="Reject if shorter/longer length ratio higher than this value"
+                    help="(--maxsl)"/>
+                <param name="mid" type="float" value="" optional="True" label="Reject if percent identity lower than this value, ignoring gaps"
+                    help="(--mid)"/>
+                <param name="minqt" type="float" value="" optional="True" label="Reject if query/target length ratio lower than this value"
+                    help="(--minqt)"/>
+                <param name="minsl" type="float" value="" optional="True" label="Reject if shorter/longer length ratio lower than this value"
+                    help="(--minsl)"/>
+                <param name="minsizeratio" type="float" value="" optional="True" label="Reject if query/target abundance ratio lower than this value"
+                    help="(--minsizeratio)"/>
+
+                <param name="maxqsize" type="integer" value="" optional="True" label="Reject if query abundance larger than this value"
+                    help="(--maxqsize)"/>
+                <param name="mincols" type="integer" value="" optional="True" label="Reject if alignment length shorter than this value"
+                    help="(--mincols)"/>
+                <param name="maxsubs" type="integer" value="" optional="True" label="Reject if more substitutions than this value"
+                    help="(--maxsubs)"/>
+                <expand macro="maxrejects" />
+                <expand macro="maxaccepts" />
+                <param name="maxdiffs" type="integer" value="" optional="True" label="Reject if more substitutions or indels than this value"
+                    help="(--maxdiffs)"/>
+                <param name="maxgaps" type="integer" value="" optional="True" label="Reject if more indels than this value"
+                    help="(--maxgaps)"/>
+                <param name="maxhits" type="integer" value="" optional="True" label="Maximum number of hits to show"
+                    help="(--maxhits)"/>
+                <param name="match" type="integer" value="2" label="Score for match"
+                    help="(--match)"/>
+                <param name="idprefix" type="integer" value="" optional="True" label="Reject if first n nucleotides do not match"
+                    help="(--idprefix)"/>
+                <param name="idsuffix" type="integer" value="" optional="True" label="Reject if last n nucleotides do not match"
+                    help="(--idsuffix)"/>
+                <param name="mintsize" type="integer" value="" optional="True" label="Reject if target abundance lower"
+                    help="(--mintsize)"/>
+                <param name="mismatch" type="integer" value="-4" label="Score for mismatch"
+                    help="(--mismatch)"/>
+
+                <conditional name="uclust_output">
+                    <param name="uclust_output_select" type="select" label="UCLUST-like output" help="(--uc)">
+                        <option value="yes">Yes</option>
+                        <option value="no" selected="True">No</option>
+                    </param>
+                    <when value="yes">
+                        <param name="uc_allhits" type="boolean" truevalue="--uc_allhits" falsevalue="" checked="False" 
+                            label="Show all, not just top hits" help="(--uc_allhits)"/>
+                    </when>
+                    <when value="no" />
+                </conditional>
+
+                <param name="wordlength" type="integer" value="8" min="3" max="15" label="Length of words for database index 3-15"
+                    help="Allowed values: 3-15 (--wordlength)"/>
+                <param name="output_no_hits" type="boolean" truevalue="--output_no_hits" falsevalue="" checked="False" 
+                    label="Write non-matching queries to the output file" help="(--output_no_hits)"/>
+
+                <expand macro="userfields" />
+            </when>
+        </conditional>
+
+
+    </inputs>
+    <outputs>
+        <data name="uc" format="fasta" label="${tool.name} on ${on_string}: UCLUST like output">
+            <filter>adv_opts.uclust_output.uclust_output_select == 'yes'</filter>
+        </data>
+        <data name="dbnotmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched database sequences">
+            <filter>'--dbnotmatchedt' in outputs</filter>
+        </data>
+        <data name="dbmatched" format="fasta" label="${tool.name} on ${on_string}: Matched database sequences">
+            <filter>'--dbmatched' in outputs</filter>
+        </data>
+        <data name="alnout" format="fasta" label="${tool.name} on ${on_string}: Alignment">
+            <filter>'--alnout' in outputs</filter>
+        </data>
+        <data name="blast6out" format="tabular" label="${tool.name} on ${on_string}: BLAST like tabular">
+            <filter>'--blast6out' in outputs</filter>
+        </data>
+        <data name="notmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched queries">
+            <filter>'--notmatched' in outputs</filter>
+        </data>
+        <data name="matched" format="fasta" label="${tool.name} on ${on_string}: Matching query sequences">
+            <filter>'--matched' in outputs</filter>
+        </data>
+        <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences">
+            <filter>'--fastapairs' in outputs</filter>
+        </data>
+        <data name="userout" format="tabular" label="${tool.name} on ${on_string}: tabular output">
+            <filter>adv_opts.userfields_output.userfields_output_select == 'yes'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="dbfile" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="queryfile" value="query.fasta" ftype="fasta" />
+            <param name="outputs" value="--blast6out,--dbmatched" />
+            <param name="dbmask" value="none" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="top_hits_only" value="True" />
+            <output name="dbmatched" file="search_dbmatched_result1.fasta" ftype="fasta" />
+            <output name="blast6out" file="search_blast6out_result1.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="dbfile" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="queryfile" value="query.fasta" ftype="fasta" />
+            <param name="outputs" value="--fastapairs,--matched" />
+            <param name="dbmask" value="none" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="userfields_output_select" value="yes" />
+            <param name="userfields" value="query,target,evalue" />
+            <param name="top_hits_only" value="True" />
+            <param name="output_no_hits" value="True" />
+            <output name="matched" file="search_matched_result2.fasta" ftype="fasta" />
+            <output name="fastapairs" file="search_fastapairs_result2.fasta" ftype="fasta" />
+            <output name="userout" file="search_userfields_result2.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+Sequence search based on vsearch.
+
+@USERFIELDS_HELP@
+
+Searching options
+  --alnout FILENAME           filename for human-readable alignment output
+  --blast6out FILENAME        filename for blast-like tab-separated output
+  --db FILENAME               filename for FASTA formatted database for search
+  --dbmask                    mask db with "dust", "soft" or "none" method (dust)
+  --dbmatched FILENAME        FASTA file for matching database sequences
+  --dbnotmatched FILENAME     FASTA file for non-matching database sequences
+  --fastapairs FILENAME       FASTA file with pairs of query and target
+  --fulldp                    full dynamic programming alignment (always on)
+  --gapext STRING             penalties for gap extension (2I/1E)
+  --gapopen STRING            penalties for gap opening (20I/2E)
+  --hardmask                  mask by replacing with N instead of lower case
+  --id REAL                   reject if identity lower
+  --iddef INT                 id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)
+  --idprefix INT              reject if first n nucleotides do not match
+  --idsuffix INT              reject if last n nucleotides do not match
+  --leftjust                  reject if terminal gaps at alignment left end
+  --match INT                 score for match (2)
+  --matched FILENAME          FASTA file for matching query sequences
+  --maxaccepts INT            number of hits to accept and show per strand (1)
+  --maxdiffs INT              reject if more substitutions or indels
+  --maxgaps INT               reject if more indels
+  --maxhits INT               maximum number of hits to show (unlimited)
+  --maxid REAL                reject if identity higher
+  --maxqsize INT              reject if query abundance larger
+  --maxqt REAL                reject if query/target length ratio higher
+  --maxrejects INT            number of non-matching hits to consider (32)
+  --maxsizeratio REAL         reject if query/target abundance ratio higher
+  --maxsl REAL                reject if shorter/longer length ratio higher
+  --maxsubs INT               reject if more substitutions
+  --mid REAL                  reject if percent identity lower, ignoring gaps
+  --mincols INT               reject if alignment length shorter
+  --minqt REAL                reject if query/target length ratio lower
+  --minsizeratio REAL         reject if query/target abundance ratio lower
+  --minsl REAL                reject if shorter/longer length ratio lower
+  --mintsize INT              reject if target abundance lower
+  --mismatch INT              score for mismatch (-4)
+  --notmatched FILENAME       FASTA file for non-matching query sequences
+  --output_no_hits            output non-matching queries to output files
+  --qmask                     mask query with "dust", "soft" or "none" method (dust)
+  --query_cov REAL            reject if fraction of query seq. aligned lower
+  --rightjust                 reject if terminal gaps at alignment right end
+  --rowlen INT                width of alignment lines in alnout output (64)
+  --self                      reject if labels identical
+  --selfid                    reject if sequences identical
+  --sizeout                   write abundance annotation to output
+  --strand                    search "plus" or "both" strands (plus)
+  --target_cov REAL           reject if fraction of target seq. aligned lower
+  --top_hits_only             output only hits with identity equal to the best
+  --uc FILENAME               filename for UCLUST-like output
+  --uc_allhits                show all, not just top hit with uc output
+  --usearch_global FILENAME   filename of queries for global alignment search
+  --userfields STRING         fields to output in userout file
+  --userout FILENAME          filename for user-defined tab-separated output
+  --weak_id REAL              include aligned hits with >= id; continue search
+  --wordlength INT            length of words for database index 3-15 (8)
+
+@EXTERNAL_DOCUMENTATION@
+
+-------
+
+@REFERENCES@
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>