changeset 0:ab2cf9c6c4f1 draft

Imported from capsule None
author dcorreia
date Tue, 15 Mar 2016 06:07:50 -0400
parents
children 3b7dde5c3ec4
files noisy.xml test-data/aln.fasta test-data/aln_out.fas tool_dependencies.xml
diffstat 4 files changed, 218 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/noisy.xml	Tue Mar 15 06:07:50 2016 -0400
@@ -0,0 +1,167 @@
+<tool id="noisy" name="Noisy" version="1.5.12.1">
+    <description>Cleaning aligned sequences</description>
+    <requirements>
+        <requirement type="package" version="1.5.12">noisy</requirement>
+    </requirements>
+    <command><![CDATA[ 
+                noisy $input
+                
+                 #if $sequence.seqtype == 'cfg' :
+	                 ## Automatic sequence detection
+	                 ## read an info file to choose which option set 
+	                 #set $info = open( str($input_info) ).read()
+	                 #if 'dna' in $info:
+	                        --seqtype D
+	                 #else if 'protein' in $info :
+	                        --seqtype P
+	                 #end if
+	                 
+                 #else :
+                    --seqtype $sequence.seqtype
+                 #end if 
+                 
+                 --cutoff $cutoff
+                 --distance $distance
+                 #if $usematrix.matrix == "true":
+                    --matrix $usematrix.matrixfile
+                 #end if
+                 #if $missingchar.setmissingchar == "true":
+                    --missing $missingchar.chars
+                 #end if
+                 --ordering
+                 #if $orderingmethode.ordering == "rand":
+                   RAND[,$orderingmethode.randpermut]
+                 #else if $orderingmethode.ordering == "list":
+                   $orderingmethode.randpermutlist
+                 #else:
+                   $orderingmethode.ordering
+                 #end if
+                 
+                 $constant
+                 $gap
+                 
+                 --shuffles $shuffles
+                 --smooth $smooth
+                 --silent
+                 
+                 ]]>
+    </command>
+	<inputs>
+		<param name="input" type="data" format="fasta" label="Source file" help="Fasta format" />
+		<conditional name="sequence">
+			<param name="seqtype" type="select" label="Sequence Coding">
+				<option value="D">DNA</option>
+				<option value="P">Protein</option>
+				<option value="R">RNA</option>
+				<option value="cfg">Config file</option>
+			</param>
+			<when value="D"></when>
+			<when value="P"></when>
+			<when value="R"></when>
+			<when value="auto">
+                <param name="input_info" type="data" format="txt" label="info" help="Precompute file containning sequence description (dna or protein)" />
+            </when>	
+		</conditional>   
+		<param name="cutoff" type="float" value="0.8" min="0" max="1" label="cut-off  [ 0-1 ]" help="Columns with a score below FLOAT are removed from the output alignment." />
+		<param name="distance" type="select" label="Distance methode used by NeighbotNet">
+		    <option value="HAMMING">HAMMING</option>
+            <option value="GTR">GTR</option>
+        </param>
+        <conditional name="usematrix">
+         <param name="matrix" type="boolean"  truevalue="true" falsevalue="" checked="False" label="Use matrix file" help="default is calculate distances with nnet"/>
+         <when value=""/>
+         <when value="true">
+            <param name="matrixfile" format="text" type="data" label="matrix file"/>
+         </when>
+        </conditional>
+        <conditional name="missingchar">
+            <param name="setmissingchar" type="boolean" truevalue="true" falsevalue="" checked="False" label="Set list of missing chars" />
+            <when value=""/>
+            <when value="true">
+                <param name="chars" type="text" label="Missing chars list"/>
+            </when>
+        </conditional>
+        <conditional name="orderingmethode">
+	        <param name="ordering" type="select" display="radio" label="Choose ordering method">
+	            <option value="nnet">NeighborNet</option>
+	            <option value="qnet">QNet</option>
+	            <option value="rand">Sample INT random permutation</option>
+	            <option value="list">List of index MSA ordering</option>
+	            <option value="all">All permutations</option>
+	         </param>
+            <when value="nnet"/>
+            <when value="qnet"/>
+            <option value="all"/>
+            <when value="rand">
+                <param name="randpermut" type="integer" value="1000" label="random permutation"/>
+            </when>
+            <when value="list">
+                <param name="randpermutlist" type="text" label="Comma-seperated string of INT"/>
+            </when>
+         </conditional>
+         <param name="constant" type="boolean" truevalue="" falsevalue="--noconstant" checked="True" label="Constant columns in output alignment" />
+         <param name="gap" type="boolean" truevalue="" falsevalue="--nogap" checked="True" label="Count gap symbol as character state"/>
+         <param name="smooth" type="integer" value="1" min="0" max="1000" label="Running average over INT columns" help="Calculate a running average over the reliability score of INT columns and use this smoothed values to remove unreliable columns from the MAS.s" />
+         <param name="shuffles" type="integer" value="0" min="0" max="1000" label="Perform INT random shuffles per column of the MSA"/>
+	</inputs>
+	<outputs>
+		<data name="output1" format="fasta" label="Noisy Cleaned sequencies"  from_work_dir="*_out.fas" />
+		<data name="output2" format="eps" label="Noisy Cleaned sequencies image"  from_work_dir="*_typ.eps" />
+		<data name="output3" format="txt" label="Noisy Cleaned sequencies information"  from_work_dir="*_sta.gr" />
+	</outputs>
+	<tests>
+        <test>
+	        <param name="input" value="aln.fasta" />
+	        <output name="output1" file="aln_out.fas" />  
+        </test>
+	</tests>
+	
+<help>
+<![CDATA[ 
+Usage::
+
+ noisy [OPTIONS] MultiFastaAlignment
+
+Options::
+
+ --cutoff FLOAT     Set cutoff value to FLOAT. Columns with a score below FLOAT are removed from the output alignment.
+                     (default is >0.80<)
+ --distance STRING  Distance methode used by NeighbotNet.
+                     (default is >HAMMING< [HAMMING|GTR])
+ -h, --help         Print usage message for noisy.
+ --matrix FILE      Use distance matrix for NeighbotNet from FILE.
+                     (default is >calculate distances with nnet<)
+ --missing STRING   Set list of missing characters to STRING.
+                     (default is >N<)
+ --nogap            Don't count gap symbol as character state.
+                     (default is >0<)
+ --noconstant       Ignore constant columns in output alignment.
+                     (default is >0<)
+ --ordering STRING
+                    Choose ordering method.
+                     (default is >nnet<)
+                    nnet         NeighborNet
+                    qnet         QNet algorithm is O(n^4) both in time and memory, be careful when you use this option.
+                    rand[,INT]   sample INT random permutation. With rand a random sample of all possible orderings of the TAXA can be specified for which the reliability score is calculated
+                                (default is >1000<)
+                    INT,INT,...  provide ordering explicitly as comma-seperated list of TAXA indices in the range (e.g 3,0,4,1,2 as ordering for the 5 TAXA in the input MSA).
+                    all          all permutations, If all is used than for all possible permutations of the TAXA the reliability score is calculated (Note that for more than 8 TAXA this can become rather time consuming!).
+
+ --shuffles INT     Perform INT random shuffles per column.
+                     (default is >0<)
+ -s, --silent       Turn off status bar
+                     (default is >0<)
+ --smooth INT       Running avarage over INT columns.
+                     (default is >1<)
+ --seqtype CHAR     Set sequence type of input alignment to CHAR
+                     (default is >D<) [D=DNA, P=Protein, R=RNA]
+ -v, --verbose      Verbose mode. Causes noisy to print debugging
+                    messages about its progress.
+]]>
+</help>
+    <citations>
+        <citation type="doi">10.1186/1748-7188-3-7</citation>
+        <citation type="doi">10.1093/molbev/msl180</citation>
+        <citation type="doi">10.1093/molbev/msh018s</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aln.fasta	Tue Mar 15 06:07:50 2016 -0400
@@ -0,0 +1,35 @@
+>TestSequence1
+ERNECFLKHKDDDPNLPPVVKPEPEALCTAFQENNNKFLENYLYEVARRHPYFYGPELLY
+YVKQ--YKAILTECCQAACCQAADKATCLAPKAKVLKEKLLASSAKQRHKCASIQKFGER
+AFKAWSIARLSQRFPKADFMDLSKLVTDLSKIHKECCHGDLLECADDREDLAKYV---QD
+SFSSKLKECCDKPLLEKSHCISELENDDLPNDLPSITTDFVEDKDVCKLLNYKEAKDVFL
+GTFLYEYSRRHPEYAVSLLLRIAKGYEATLERCCATDDAHACYSKVFDELQPLVDEPQ--
+KLMKRNCELFENLGAYGFQNALIIRYTKKMPQVSTPTLLVISKELANMGNKCCTLPESKR
+>TestSequence2
+ERADCFASHRDDNPGFPLMVRPPVDELCASYQADAQMFAGKYLYEVARRYPYFYAPELLY
+YAQKLLYKDALAEC-----CSAADKAACLTPKIDDLKESVMTSGAKQRFKCAGIEKFGER
+AFKAWAVARLSQKFPNADFAEISKIVTDLTKINKECCHGDLLECADDRVELGKYMCDNKD
+SISSKLGKCCEKPLLEKGHCIAELERDDMPADLSPIEADFVEDKEVCK--NYAEAKDVFL
+GTFLYELSRRHPEYSVVMLLRLAKGYEAVLEKCCATGDPPACYAKVFDELKPLIDEPQ--
+NLVKHNCELYGNLQEYGFQNALLIRYTKKMPQ---PTLVEVSRNLGKVGTKCCSLAEGER
+>TestSequence3
+ERNECFLSHKDDSPDLPKL-KPDPNTLCDEFKADEKKFWGKYLYEIARRHPYFYAPELLY
+YANK--YNGVFQEC-----CQAEDKGACLLPKIETMREKVLTSSARQRLRCASIQKFGER
+ALKAWSVARLSQKFPKAEFVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQD
+TISSKLKECCDKPLLEKSHCIAEVEKDAVPENLPPLTADFAEDKDVCK--NYQEAKDAFL
+GSFLYEYSRRHPE-YVLLRLALLKEYEATLEECCAKDDPHACYSTVFDKLKHLVDEPQ--
+NLIKQNCDQFEKLGEYGFQNALIVRYTRKVPQVSTPTLVEVSRSLGKVGTRCCTKPESER
+>TestSequence4
+ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
+FAKR--YKAAFTEC-----CQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGER
+AFKAWAVARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQD
+SISSKLKECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCK--NYAEAKDVFL
+GMFLYEYARRHPRESVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQLL
+NLIKQNCELFEQLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKR
+>TestSequence5
+ERNECFLQHKDDNPGFGQLVTPEADAMCTAFHENEQRFLGKYLYEIARRHPYFYAPELLY
+YAEE--YKGVFTEC-----CEAADKAACLTPKVDALREKVLASSAKERLKCASLQKFGER
+AFKAWSVARLSQKFPKAEFAEISKLVTDLAKIHKECCHGDLLECADDRADLAKYICENQD
+SISTKLKECCGKPVLEKSHCISEVERDELPADLPPLAVDFVEDKEVCK--NYQEAKDVFL
+GTFLYEYSRRHPS-SVSLLLRLAKEYEATLEKCCATDDPPACYAHVFDEFKPLVEEPH--
+NLVKTNCELFEKLGEYGFQNALLVRYTKKVPQVSTPTLVEVSRSLGKVGSKCCTHPEAER
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aln_out.fas	Tue Mar 15 06:07:50 2016 -0400
@@ -0,0 +1,10 @@
+>TestSequence1
+ERNCFHDDPPCNFNYLYEARRPYFYPELLYKECCADKCLPKSARCAKFGERAKAWARLSQFPKAFKVTDLKHECCHGDLLECADDRLKY-DSKLCCKPLEKHCIEEDPDLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPKLKCLYFQALRYTKPQPTLSLNGCCER
+>TestSequence5
+ERNCFHDDPPCNFKYLYEARRPYFYPELLYKECCADKCLPKSARCAKFGERAKAWARLSQFPKAFKVTDLKHECCHGDLLECADDRLKYNDSKLCCKPLEKHCIEEDPDLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPNLKCLYFQALRYTKPQPTLSLKGCCER
+>TestSequence2
+ERACFHDDPPCDFKYLYEARRPYFYPELLYKECCADKCLPKSARCAKFGERAKAWARLSQFPNAFKVTDLKNECCHGDLLECADDRLKYNDSKLCCKPLEKHCIEEDPDLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPNLKCLYFQALRYTKPQPTLSLKGCCER
+>TestSequence3
+ERNCFHDDPPCDFKYLYEARRPYFYPELLYNECCADKCLPKSARCAKFGERAKAWARLSQFPKAFKVTDLKHECCHGDLLECADDRLKYNDSKLCCKPLEKHCIEEDPNLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPNLKCLYFQALRYTKPQPTLSLKGCCER
+>TestSequence4
+ERNCFHDDPPCNFKYLYEARRPYFYPELLYKECCADKCLPKSARCAKFGERAKAWARLSQFPKAFKVTDLKHECCHGDLLECADDRLKYNDSKLCCKPLEKHCIEEDPDLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPNLKCLYFQALRYTKPQPTLSLKGCCER
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Mar 15 06:07:50 2016 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="noisy" version="1.5.12">
+      <repository changeset_revision="289d9da141b8" name="package_noisy_1_5_12" owner="dcorreia" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>