Mercurial > repos > iuc > vsearch
view clustering.xml @ 3:4258854759ba draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsearch commit 29f6e6424a37947adbe1eba92f0e7d3c83efc042-dirty
author | iuc |
---|---|
date | Tue, 01 Mar 2016 07:07:03 -0500 |
parents | f29e21388219 |
children | 576963db5f1b |
line wrap: on
line source
<tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0"> <description></description> <macros> <import>vsearch_macros.xml</import> </macros> <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version_command" /> <command> <![CDATA[ vsearch @GENERAL@ #if $clustering_mode.clustering_mode_select == 'cluster_fast': --cluster_fast "$infile" #else if $clustering_mode.clustering_mode_select == 'cluster_smallmem': --cluster_smallmem "$infile" #end if ##--clusters STRING output each cluster to a separate FASTA file #if $maxrejects: --maxrejects "$maxrejects" #end if #if $maxaccepts: --maxaccepts "$maxaccepts" #end if $cons_truncate --id "$id" --iddef "$iddef" #if '--msaout' in str($outputs).split( "," ): --msaout "$msaout" #end if #if '--consout' in str($outputs).split( "," ): --consout "$consout" #end if #if '--centroids' in str($outputs).split( "," ): --centroids "$centroids" #end if #if '--alnout' in str($outputs).split( "," ): --alnout "$alnout" #end if #if '--blast6out' in str($outputs).split( "," ): --blast6out "$blast6out" #end if #if '--notmatched' in str($outputs).split( "," ): --notmatched "$notmatched" #end if #if '--fastapairs' in str($outputs).split( "," ): --fastapairs "$fastapairs" #end if #if '--matched' in str($outputs).split( "," ): --matched "$matched" #end if #if str( $qmask ) != 'no': --qmask "$qmask" #end if $sizein $sizeout --strand "$strand" $usersort #if $uc: --uc "$uc_outfile" #end if ]]> </command> <inputs> <param name="infile" type="data" format="fasta" label="Select your input FASTA file" help="" /> <conditional name="clustering_mode"> <param name="clustering_mode_select" type="select" label="Choose sorting method to use before clustering" help=""> <option value="cluster_fast" default="True">Cluster sequences after sorting by length (--cluster-fast)</option> <option value="cluster_smallmem">Cluster already sorted sequences (--cluster-smallmem)</option> </param> <when value="cluster_fast"> </when> <when value="cluster_smallmem"> </when> </conditional> <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False" label="Indicate that input sequences are not presorted by length" help="(--usersort)"/> <expand macro="id_and_iddef" /> <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False" label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/> <expand macro="qmask" /> <expand macro="sizein" /> <expand macro="sizeout" /> <expand macro="strand" /> <expand macro="maxrejects" /> <expand macro="maxaccepts" /> <expand macro="general_output"> <option value="--msaout">Multiple sequence alignments</option> <option value="--consout">Cluster consensus sequences</option> <option value="--centroids">Centroid sequences</option> <option value="--notmatched">Write non-matching query sequences to separate file</option> <option value="--matched">Write matching query sequences to separate file</option> </expand> <expand macro="uclust_like_output" /> </inputs> <outputs> <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments"> <filter>'--msaout' in outputs</filter> </data> <data name="consout" format="fasta" label="${tool.name} on ${on_string}: Consensus Sequences"> <filter>'--consout' in outputs</filter> </data> <data name="centroids" format="fasta" label="${tool.name} on ${on_string}: Cluster centroids"> <filter>'--centroids' in outputs</filter> </data> <data name="alnout" format="fasta" label="${tool.name} on ${on_string}: Alignment"> <filter>'--alnout' in outputs</filter> </data> <data name="notmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched queries"> <filter>'--notmatched' in outputs</filter> </data> <data name="matched" format="fasta" label="${tool.name} on ${on_string}: Matching query sequences"> <filter>'--matched' in outputs</filter> </data> <data name="blast6out" format="tabular" label="${tool.name} on ${on_string}: BLAST like tabular"> <filter>'--blast6out' in outputs</filter> </data> <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences"> <filter>'--fastapairs' in outputs</filter> </data> <data name="uc_outfile" format="tabular" label="${tool.name} on ${on_string}: UCLUST like output"> <filter>uc is True</filter> </data> </outputs> <tests> <test> <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> <param name="clustering_mode_select" value="cluster_smallmem"/> <param name="usersort" value="--usersort"/> <param name="id" value="0.99"/> <param name="maxaccepts" value="1"/> <param name="maxrejects" value="2"/> <param name="sizeout" value="--sizeout"/> <param name="outputs" value="--centroids,--blast6out,--notmatched" /> <output name="centroids" file="clustering_centroids_result2.fasta" ftype="fasta" /> <output name="blast6out" file="clustering_blast6out_result2.tab" ftype="tabular" /> <output name="notmatched" file="clustering_notmatched_result2.fasta" ftype="fasta" /> </test> <test> <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> <param name="clustering_mode_select" value="cluster_smallmem"/> <param name="usersort" value="--usersort"/> <param name="id" value="0.99"/> <param name="maxaccepts" value="1"/> <param name="maxrejects" value="2"/> <param name="sizeout" value="--sizeout"/> <param name="outputs" value="--centroids" /> <param name="uc" value="--uc"/> <output name="centroids" file="clustering_centroids_result2.fasta" ftype="fasta" /> <output name="uc_outfile" file="clustering_uc_result3.uc" ftype="tabular" /> </test> <test> <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> <param name="clustering_mode_select" value="cluster_smallmem"/> <param name="usersort" value="--usersort"/> <param name="id" value="0.99"/> <param name="maxaccepts" value="1"/> <param name="maxrejects" value="2"/> <param name="sizeout" value="--sizeout"/> <param name="outputs" value="--centroids" /> <param name="iddef" value="0"/> <output name="centroids" file="clustering_centroids_result4.fasta" ftype="fasta" /> </test> <test> <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> <param name="clustering_mode_select" value="cluster_fast"/> <param name="usersort" value="--usersort"/> <param name="id" value="0.99"/> <param name="maxaccepts" value="1"/> <param name="maxrejects" value="2"/> <param name="sizeout" value=""/> <param name="outputs" value="--centroids" /> <param name="qmask" value="none"/> <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" /> </test> </tests> <help> <![CDATA[ **What it does** vsearch implements a single-pass, greedy star-clustering algorithm, similar to the algorithms implemented in usearch, DNAclust and sumaclust for example. Clustering options (most searching options also apply) --centroids FILENAME output centroid sequences to FASTA file --cluster_fast FILENAME cluster sequences after sorting by length --cluster_size FILENAME cluster sequences after sorting by abundance --cluster_smallmem FILENAME cluster already sorted sequences (see -usersort) --clusters STRING output each cluster to a separate FASTA file --consout FILENAME output cluster consensus sequences to FASTA file --cons_truncate do not ignore terminal gaps in MSA for consensus --id REAL reject if identity lower --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2) --msaout FILENAME output multiple seq. alignments to FASTA file --qmask seqs with dust, soft or no method (dust) --sizein propagate abundance annotation from input --sizeout write cluster abundances to centroid file --strand cluster using plus or both strands (plus) --uc FILENAME filename for UCLUST-like output --usersort indicate sequences not presorted by length @EXTERNAL_DOCUMENTATION@ ]]> </help> <expand macro="citations" /> </tool>