Mercurial > repos > iuc > vsearch
changeset 1:8c4e2933a17a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsearch commit 95732e013ec4dfe5dae0b9ed81e9d7710cbaed9d
| author | iuc | 
|---|---|
| date | Wed, 26 Aug 2015 13:34:22 -0400 | 
| parents | fae6527990af | 
| children | f29e21388219 | 
| files | clustering.xml dereplication.xml sorting.xml test-data/clustering_blast6out_result2.tab test-data/clustering_centroids_result2.fasta test-data/clustering_centroids_result4.fasta test-data/clustering_notmatched_result2.fasta test-data/clustering_uc_result3.uc test-data/dereplication_result2.fasta test-data/dereplication_uc_result3.fasta test-data/sorting_result3.fasta test-data/sorting_result4.fasta test-data/sorting_result5.fasta tool_dependencies.xml vsearch_macros.xml | 
| diffstat | 15 files changed, 34678 insertions(+), 38 deletions(-) [+] | 
line wrap: on
 line diff
--- a/clustering.xml Thu May 21 03:58:09 2015 -0400 +++ b/clustering.xml Wed Aug 26 13:34:22 2015 -0400 @@ -1,4 +1,4 @@ -<tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0"> +<tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.1"> <description></description> <macros> <import>vsearch_macros.xml</import> @@ -10,8 +10,12 @@ <![CDATA[ vsearch @GENERAL@ - --cluster_fast "$infile" - ##--cluster_smallmem FILENAME cluster sequences using a small amount of memory + + #if $clustering_mode.clustering_mode_select == 'cluster_fast': + --cluster_fast "$infile" + #else if $clustering_mode.clustering_mode_select == 'cluster_smallmem': + --cluster_smallmem "$infile" + #end if ##--clusters STRING output each cluster to a separate FASTA file #if $maxrejects: @@ -23,7 +27,7 @@ $cons_truncate --id $id - ##--iddef $iddef + --iddef $iddef #if '--msaout' in str($outputs): --msaout $msaout @@ -52,24 +56,33 @@ #if $qmask != 'no': --qmask $qmask #end if - #if $sizein: - --sizein $sizein - #end if - #if $sizeout: - --sizeout $sizeout - #end if + $sizein + $sizeout --strand $strand - --usersort $usersort + $usersort + #if $uc: + --uc "$uc_outfile" + #end if ]]> </command> <inputs> - <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--cluster_fast)" /> + <param name="infile" type="data" format="fasta" label="Select your input FASTA file" help="" /> + <conditional name="clustering_mode"> + <param name="clustering_mode_select" type="select" label="Choose sorting method to use before clustering" help=""> + <option value="cluster_fast" default="True">Cluster sequences after sorting by length (--cluster-fast)</option> + <option value="cluster_smallmem">Cluster already sorted sequences (--cluster-smallmem)</option> + </param> + <when value="cluster_fast"> + </when> + <when value="cluster_smallmem"> + </when> + </conditional> + <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False" + label="Indicate that input sequences are not presorted by length" help="(--usersort)"/> <expand macro="id_and_iddef" /> <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False" label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/> - <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False" - label="Indicate that input sequences are presorted" help="(--usersort)"/> <expand macro="qmask" /> <expand macro="sizein" /> <expand macro="sizeout" /> @@ -83,6 +96,7 @@ <option value="--notmatched">Write non-matching query sequences to separate file</option> <option value="--matched">Write matching query sequences to separate file</option> </expand> + <expand macro="uclust_like_output" /> </inputs> <outputs> @@ -110,6 +124,9 @@ <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences"> <filter>'--fastapairs' in outputs</filter> </data> + <data name="uc_outfile" format="tabular" label="${tool.name} on ${on_string}: UCLUST like output"> + <filter>uc is True</filter> + </data> </outputs> <tests> <test> @@ -117,7 +134,7 @@ <param name="id" value="0.99"/> <param name="maxaccepts" value="1"/> <param name="maxrejects" value="2"/> - <param name="sizeout" value="--sizeout"/> + <param name="sizeout" value=""/> <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" /> <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" /> <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" /> @@ -128,6 +145,56 @@ <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /--> <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /--> </test> + <test> + <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> + <param name="clustering_mode_select" value="cluster_smallmem"/> + <param name="usersort" value="--usersort"/> + <param name="id" value="0.99"/> + <param name="maxaccepts" value="1"/> + <param name="maxrejects" value="2"/> + <param name="sizeout" value="--sizeout"/> + <param name="outputs" value="--centroids,--blast6out,--notmatched" /> + <output name="centroids" file="clustering_centroids_result2.fasta" ftype="fasta" /> + <output name="blast6out" file="clustering_blast6out_result2.tab" ftype="tabular" /> + <output name="notmatched" file="clustering_notmatched_result2.fasta" ftype="fasta" /> + </test> + <test> + <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> + <param name="clustering_mode_select" value="cluster_smallmem"/> + <param name="usersort" value="--usersort"/> + <param name="id" value="0.99"/> + <param name="maxaccepts" value="1"/> + <param name="maxrejects" value="2"/> + <param name="sizeout" value="--sizeout"/> + <param name="outputs" value="--centroids" /> + <param name="uc" value="--uc"/> + <output name="centroids" file="clustering_centroids_result2.fasta" ftype="fasta" /> + <output name="uc_outfile" file="clustering_uc_result3.uc" ftype="tabular" /> + </test> + <test> + <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> + <param name="clustering_mode_select" value="cluster_smallmem"/> + <param name="usersort" value="--usersort"/> + <param name="id" value="0.99"/> + <param name="maxaccepts" value="1"/> + <param name="maxrejects" value="2"/> + <param name="sizeout" value="--sizeout"/> + <param name="outputs" value="--centroids" /> + <param name="iddef" value="0"/> + <output name="centroids" file="clustering_centroids_result4.fasta" ftype="fasta" /> + </test> + <test> + <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> + <param name="clustering_mode_select" value="cluster_fast"/> + <param name="usersort" value="--usersort"/> + <param name="id" value="0.99"/> + <param name="maxaccepts" value="1"/> + <param name="maxrejects" value="2"/> + <param name="sizeout" value=""/> + <param name="outputs" value="--centroids" /> + <param name="qmask" value="none"/> + <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" /> + </test> </tests> <help> <![CDATA[ @@ -139,19 +206,21 @@ Clustering options (most searching options also apply) --centroids FILENAME output centroid sequences to FASTA file - --cluster_fast FILENAME cluster sequences fast - --cluster_smallmem FILENAME cluster sequences using a small amount of memory + --cluster_fast FILENAME cluster sequences after sorting by length + --cluster_size FILENAME cluster sequences after sorting by abundance + --cluster_smallmem FILENAME cluster already sorted sequences (see -usersort) --clusters STRING output each cluster to a separate FASTA file --consout FILENAME output cluster consensus sequences to FASTA file --cons_truncate do not ignore terminal gaps in MSA for consensus --id REAL reject if identity lower --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2) - --msaout FILENAME output multiple seq. alignments to FASTA file - --qmask mask seqs with dust, soft or no method (dust) - --sizein read abundance annotation from input + --msaout FILENAME output multiple seq. alignments to FASTA file + --qmask seqs with dust, soft or no method (dust) + --sizein propagate abundance annotation from input --sizeout write cluster abundances to centroid file - --strand cluster using "plus" or "both" strands (plus) - --usersort indicate that input sequences are presorted + --strand cluster using plus or both strands (plus) + --uc FILENAME filename for UCLUST-like output + --usersort indicate sequences not presorted by length @EXTERNAL_DOCUMENTATION@
--- a/dereplication.xml Thu May 21 03:58:09 2015 -0400 +++ b/dereplication.xml Wed Aug 26 13:34:22 2015 -0400 @@ -1,4 +1,4 @@ -<tool id="vsearch_dereplication" name="VSearch dereplication" version="@VERSION@.0"> +<tool id="vsearch_dereplication" name="VSearch dereplication" version="@VERSION@.1"> <description></description> <macros> <import>vsearch_macros.xml</import> @@ -18,16 +18,14 @@ --minuniquesize $minuniquesize #end if --output $outfile - #if $sizein: - --sizein $sizein + $sizein + $sizeout + --strand $strand + #if $topn: + --topn $topn #end if - #if $sizeout: - --sizeout $sizeout - #end if - --strand $strand - --topn $topn #if $uc: - --uc $uc + --uc $uc_outfile #end if ]]> </command> @@ -45,7 +43,7 @@ </inputs> <outputs> <data name="outfile" format="fasta" label="${tool.name} on ${on_string}" /> - <data name="uc" format="fasta" label="${tool.name} on ${on_string}: UCLUST like output"> + <data name="uc_outfile" format="fasta" label="${tool.name} on ${on_string}: UCLUST like output"> <filter>uc is True</filter> </data> </outputs> @@ -55,9 +53,28 @@ <param name="strand" value="both" /> <param name="minuniquesize" value="1" /> <param name="maxuniquesize" value="100000" /> + <param name="topn" value="10000" /> + <output name="outfile" file="dereplication_result1.fasta" ftype="fasta" /> + </test> + <test> + <param name="infile" value="AF091148_first_rep.fsa.bz2" ftype="fasta" /> + <param name="strand" value="both" /> + <param name="minuniquesize" value="1" /> + <param name="maxuniquesize" value="100000" /> + <param name="sizeout" value="--sizeout"/> + <param name="topn" value="" /> + <output name="outfile" file="dereplication_result2.fasta" ftype="fasta" /> + </test> + <test> + <param name="infile" value="AF091148_first_rep.fsa.bz2" ftype="fasta" /> + <param name="strand" value="both" /> + <param name="minuniquesize" value="1" /> + <param name="maxuniquesize" value="100000" /> <param name="sizeout" value="--sizeout"/> <param name="topn" value="10000" /> - <output name="outfile" file="dereplication_result1.fasta" ftype="fasta" /> + <param name="uc" value="--uc" /> + <output name="outfile" file="dereplication_result2.fasta" ftype="fasta" /> + <output name="uc_outfile" file="dereplication_uc_result3.fasta" ftype="fasta" /> </test> </tests> <help>
--- a/sorting.xml Thu May 21 03:58:09 2015 -0400 +++ b/sorting.xml Wed Aug 26 13:34:22 2015 -0400 @@ -1,4 +1,4 @@ -<tool id="vsearch_sorting" name="VSearch sorting" version="@VERSION@.0"> +<tool id="vsearch_sorting" name="VSearch sorting" version="@VERSION@.1"> <description></description> <macros> <import>vsearch_macros.xml</import> @@ -22,9 +22,10 @@ #end if #end if --output $outfile - #if $sizeout: - --sizeout $sizeout + #if $relabel: + --relabel "$relabel" #end if + $sizeout #if $topn: --topn $topn #end if @@ -49,6 +50,8 @@ </when> </conditional> <expand macro="topn" /> + <param name="relabel" type="text" value="" + label="Relabel with this prefix string after sorting" help="(--relabel)"/> <param name="sizeout" type="boolean" truevalue="--sizeout" falsevalue="" checked="False" label="Add abundance annotation to output" help="(--sizeout)"/> </inputs> @@ -66,6 +69,24 @@ <param name="infile" value="db.fasta" ftype="fasta" /> <output name="outfile" file="sorting_result2.fasta" ftype="fasta" /> </test> + <test> + <param name="sorting_mode_select" value="sortbylength"/> + <param name="infile" value="db.fasta" ftype="fasta" /> + <param name="relabel" value="TEST" /> + <output name="outfile" file="sorting_result3.fasta" ftype="fasta" /> + </test> + <test> + <param name="sorting_mode_select" value="sortbylength"/> + <param name="infile" value="db.fasta" ftype="fasta" /> + <param name="sizeout" value="--sizeout" /> + <output name="outfile" file="sorting_result4.fasta" ftype="fasta" /> + </test> + <test> + <param name="sorting_mode_select" value="sortbylength"/> + <param name="infile" value="db.fasta" ftype="fasta" /> + <param name="relabel" value="With spaces" /> + <output name="outfile" file="sorting_result5.fasta" ftype="fasta" /> + </test> </tests> <help> <