Mercurial > repos > dcorreia > noisy
changeset 0:ab2cf9c6c4f1 draft
Imported from capsule None
author | dcorreia |
---|---|
date | Tue, 15 Mar 2016 06:07:50 -0400 |
parents | |
children | 3b7dde5c3ec4 |
files | noisy.xml test-data/aln.fasta test-data/aln_out.fas tool_dependencies.xml |
diffstat | 4 files changed, 218 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/noisy.xml Tue Mar 15 06:07:50 2016 -0400 @@ -0,0 +1,167 @@ +<tool id="noisy" name="Noisy" version="1.5.12.1"> + <description>Cleaning aligned sequences</description> + <requirements> + <requirement type="package" version="1.5.12">noisy</requirement> + </requirements> + <command><![CDATA[ + noisy $input + + #if $sequence.seqtype == 'cfg' : + ## Automatic sequence detection + ## read an info file to choose which option set + #set $info = open( str($input_info) ).read() + #if 'dna' in $info: + --seqtype D + #else if 'protein' in $info : + --seqtype P + #end if + + #else : + --seqtype $sequence.seqtype + #end if + + --cutoff $cutoff + --distance $distance + #if $usematrix.matrix == "true": + --matrix $usematrix.matrixfile + #end if + #if $missingchar.setmissingchar == "true": + --missing $missingchar.chars + #end if + --ordering + #if $orderingmethode.ordering == "rand": + RAND[,$orderingmethode.randpermut] + #else if $orderingmethode.ordering == "list": + $orderingmethode.randpermutlist + #else: + $orderingmethode.ordering + #end if + + $constant + $gap + + --shuffles $shuffles + --smooth $smooth + --silent + + ]]> + </command> + <inputs> + <param name="input" type="data" format="fasta" label="Source file" help="Fasta format" /> + <conditional name="sequence"> + <param name="seqtype" type="select" label="Sequence Coding"> + <option value="D">DNA</option> + <option value="P">Protein</option> + <option value="R">RNA</option> + <option value="cfg">Config file</option> + </param> + <when value="D"></when> + <when value="P"></when> + <when value="R"></when> + <when value="auto"> + <param name="input_info" type="data" format="txt" label="info" help="Precompute file containning sequence description (dna or protein)" /> + </when> + </conditional> + <param name="cutoff" type="float" value="0.8" min="0" max="1" label="cut-off [ 0-1 ]" help="Columns with a score below FLOAT are removed from the output alignment." /> + <param name="distance" type="select" label="Distance methode used by NeighbotNet"> + <option value="HAMMING">HAMMING</option> + <option value="GTR">GTR</option> + </param> + <conditional name="usematrix"> + <param name="matrix" type="boolean" truevalue="true" falsevalue="" checked="False" label="Use matrix file" help="default is calculate distances with nnet"/> + <when value=""/> + <when value="true"> + <param name="matrixfile" format="text" type="data" label="matrix file"/> + </when> + </conditional> + <conditional name="missingchar"> + <param name="setmissingchar" type="boolean" truevalue="true" falsevalue="" checked="False" label="Set list of missing chars" /> + <when value=""/> + <when value="true"> + <param name="chars" type="text" label="Missing chars list"/> + </when> + </conditional> + <conditional name="orderingmethode"> + <param name="ordering" type="select" display="radio" label="Choose ordering method"> + <option value="nnet">NeighborNet</option> + <option value="qnet">QNet</option> + <option value="rand">Sample INT random permutation</option> + <option value="list">List of index MSA ordering</option> + <option value="all">All permutations</option> + </param> + <when value="nnet"/> + <when value="qnet"/> + <option value="all"/> + <when value="rand"> + <param name="randpermut" type="integer" value="1000" label="random permutation"/> + </when> + <when value="list"> + <param name="randpermutlist" type="text" label="Comma-seperated string of INT"/> + </when> + </conditional> + <param name="constant" type="boolean" truevalue="" falsevalue="--noconstant" checked="True" label="Constant columns in output alignment" /> + <param name="gap" type="boolean" truevalue="" falsevalue="--nogap" checked="True" label="Count gap symbol as character state"/> + <param name="smooth" type="integer" value="1" min="0" max="1000" label="Running average over INT columns" help="Calculate a running average over the reliability score of INT columns and use this smoothed values to remove unreliable columns from the MAS.s" /> + <param name="shuffles" type="integer" value="0" min="0" max="1000" label="Perform INT random shuffles per column of the MSA"/> + </inputs> + <outputs> + <data name="output1" format="fasta" label="Noisy Cleaned sequencies" from_work_dir="*_out.fas" /> + <data name="output2" format="eps" label="Noisy Cleaned sequencies image" from_work_dir="*_typ.eps" /> + <data name="output3" format="txt" label="Noisy Cleaned sequencies information" from_work_dir="*_sta.gr" /> + </outputs> + <tests> + <test> + <param name="input" value="aln.fasta" /> + <output name="output1" file="aln_out.fas" /> + </test> + </tests> + +<help> +<![CDATA[ +Usage:: + + noisy [OPTIONS] MultiFastaAlignment + +Options:: + + --cutoff FLOAT Set cutoff value to FLOAT. Columns with a score below FLOAT are removed from the output alignment. + (default is >0.80<) + --distance STRING Distance methode used by NeighbotNet. + (default is >HAMMING< [HAMMING|GTR]) + -h, --help Print usage message for noisy. + --matrix FILE Use distance matrix for NeighbotNet from FILE. + (default is >calculate distances with nnet<) + --missing STRING Set list of missing characters to STRING. + (default is >N<) + --nogap Don't count gap symbol as character state. + (default is >0<) + --noconstant Ignore constant columns in output alignment. + (default is >0<) + --ordering STRING + Choose ordering method. + (default is >nnet<) + nnet NeighborNet + qnet QNet algorithm is O(n^4) both in time and memory, be careful when you use this option. + rand[,INT] sample INT random permutation. With rand a random sample of all possible orderings of the TAXA can be specified for which the reliability score is calculated + (default is >1000<) + INT,INT,... provide ordering explicitly as comma-seperated list of TAXA indices in the range (e.g 3,0,4,1,2 as ordering for the 5 TAXA in the input MSA). + all all permutations, If all is used than for all possible permutations of the TAXA the reliability score is calculated (Note that for more than 8 TAXA this can become rather time consuming!). + + --shuffles INT Perform INT random shuffles per column. + (default is >0<) + -s, --silent Turn off status bar + (default is >0<) + --smooth INT Running avarage over INT columns. + (default is >1<) + --seqtype CHAR Set sequence type of input alignment to CHAR + (default is >D<) [D=DNA, P=Protein, R=RNA] + -v, --verbose Verbose mode. Causes noisy to print debugging + messages about its progress. +]]> +</help> + <citations> + <citation type="doi">10.1186/1748-7188-3-7</citation> + <citation type="doi">10.1093/molbev/msl180</citation> + <citation type="doi">10.1093/molbev/msh018s</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/aln.fasta Tue Mar 15 06:07:50 2016 -0400 @@ -0,0 +1,35 @@ +>TestSequence1 +ERNECFLKHKDDDPNLPPVVKPEPEALCTAFQENNNKFLENYLYEVARRHPYFYGPELLY +YVKQ--YKAILTECCQAACCQAADKATCLAPKAKVLKEKLLASSAKQRHKCASIQKFGER +AFKAWSIARLSQRFPKADFMDLSKLVTDLSKIHKECCHGDLLECADDREDLAKYV---QD +SFSSKLKECCDKPLLEKSHCISELENDDLPNDLPSITTDFVEDKDVCKLLNYKEAKDVFL +GTFLYEYSRRHPEYAVSLLLRIAKGYEATLERCCATDDAHACYSKVFDELQPLVDEPQ-- +KLMKRNCELFENLGAYGFQNALIIRYTKKMPQVSTPTLLVISKELANMGNKCCTLPESKR +>TestSequence2 +ERADCFASHRDDNPGFPLMVRPPVDELCASYQADAQMFAGKYLYEVARRYPYFYAPELLY +YAQKLLYKDALAEC-----CSAADKAACLTPKIDDLKESVMTSGAKQRFKCAGIEKFGER +AFKAWAVARLSQKFPNADFAEISKIVTDLTKINKECCHGDLLECADDRVELGKYMCDNKD +SISSKLGKCCEKPLLEKGHCIAELERDDMPADLSPIEADFVEDKEVCK--NYAEAKDVFL +GTFLYELSRRHPEYSVVMLLRLAKGYEAVLEKCCATGDPPACYAKVFDELKPLIDEPQ-- +NLVKHNCELYGNLQEYGFQNALLIRYTKKMPQ---PTLVEVSRNLGKVGTKCCSLAEGER +>TestSequence3 +ERNECFLSHKDDSPDLPKL-KPDPNTLCDEFKADEKKFWGKYLYEIARRHPYFYAPELLY +YANK--YNGVFQEC-----CQAEDKGACLLPKIETMREKVLTSSARQRLRCASIQKFGER +ALKAWSVARLSQKFPKAEFVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQD +TISSKLKECCDKPLLEKSHCIAEVEKDAVPENLPPLTADFAEDKDVCK--NYQEAKDAFL +GSFLYEYSRRHPE-YVLLRLALLKEYEATLEECCAKDDPHACYSTVFDKLKHLVDEPQ-- +NLIKQNCDQFEKLGEYGFQNALIVRYTRKVPQVSTPTLVEVSRSLGKVGTRCCTKPESER +>TestSequence4 +ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF +FAKR--YKAAFTEC-----CQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGER +AFKAWAVARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQD +SISSKLKECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCK--NYAEAKDVFL +GMFLYEYARRHPRESVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQLL +NLIKQNCELFEQLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKR +>TestSequence5 +ERNECFLQHKDDNPGFGQLVTPEADAMCTAFHENEQRFLGKYLYEIARRHPYFYAPELLY +YAEE--YKGVFTEC-----CEAADKAACLTPKVDALREKVLASSAKERLKCASLQKFGER +AFKAWSVARLSQKFPKAEFAEISKLVTDLAKIHKECCHGDLLECADDRADLAKYICENQD +SISTKLKECCGKPVLEKSHCISEVERDELPADLPPLAVDFVEDKEVCK--NYQEAKDVFL +GTFLYEYSRRHPS-SVSLLLRLAKEYEATLEKCCATDDPPACYAHVFDEFKPLVEEPH-- +NLVKTNCELFEKLGEYGFQNALLVRYTKKVPQVSTPTLVEVSRSLGKVGSKCCTHPEAER \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/aln_out.fas Tue Mar 15 06:07:50 2016 -0400 @@ -0,0 +1,10 @@ +>TestSequence1 +ERNCFHDDPPCNFNYLYEARRPYFYPELLYKECCADKCLPKSARCAKFGERAKAWARLSQFPKAFKVTDLKHECCHGDLLECADDRLKY-DSKLCCKPLEKHCIEEDPDLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPKLKCLYFQALRYTKPQPTLSLNGCCER +>TestSequence5 +ERNCFHDDPPCNFKYLYEARRPYFYPELLYKECCADKCLPKSARCAKFGERAKAWARLSQFPKAFKVTDLKHECCHGDLLECADDRLKYNDSKLCCKPLEKHCIEEDPDLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPNLKCLYFQALRYTKPQPTLSLKGCCER +>TestSequence2 +ERACFHDDPPCDFKYLYEARRPYFYPELLYKECCADKCLPKSARCAKFGERAKAWARLSQFPNAFKVTDLKNECCHGDLLECADDRLKYNDSKLCCKPLEKHCIEEDPDLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPNLKCLYFQALRYTKPQPTLSLKGCCER +>TestSequence3 +ERNCFHDDPPCDFKYLYEARRPYFYPELLYNECCADKCLPKSARCAKFGERAKAWARLSQFPKAFKVTDLKHECCHGDLLECADDRLKYNDSKLCCKPLEKHCIEEDPNLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPNLKCLYFQALRYTKPQPTLSLKGCCER +>TestSequence4 +ERNCFHDDPPCNFKYLYEARRPYFYPELLYKECCADKCLPKSARCAKFGERAKAWARLSQFPKAFKVTDLKHECCHGDLLECADDRLKYNDSKLCCKPLEKHCIEEDPDLDFEKVCKYEAKDFLGFLYERRHPVLKYELECCADCYVFDLEPNLKCLYFQALRYTKPQPTLSLKGCCER
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Mar 15 06:07:50 2016 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="noisy" version="1.5.12"> + <repository changeset_revision="289d9da141b8" name="package_noisy_1_5_12" owner="dcorreia" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>