Mercurial > repos > gandres > fastme
changeset 4:aea74cec21e0 draft default tip
Uploaded
author | dcorreia |
---|---|
date | Fri, 22 Apr 2016 08:04:53 -0400 |
parents | df98a0425c8d |
children | |
files | fastme.xml |
diffstat | 1 files changed, 289 insertions(+), 181 deletions(-) [+] |
line wrap: on
line diff
--- a/fastme.xml Fri Dec 11 09:51:04 2015 -0500 +++ b/fastme.xml Fri Apr 22 08:04:53 2016 -0400 @@ -1,193 +1,201 @@ -<tool id="sniplay_fastme" name="Fastme" version="1.1.0"> - - <!-- [REQUIRED] Tool description displayed after the tool name --> - <description> Calculate distance tree for an alignment file</description> - - <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> +<tool id="fastme" name="FastME" version="2.1.4.2"> + <description>Distance-based inference of phylogenetic trees</description> <requirements> - <requirement type="binary">perl</requirement> - <requirement type="package" version="2.1.4">fastme</requirement> + <requirement type="package" version="2.1.4">fastme</requirement> </requirements> - - <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <version_command> + <![CDATA[ fastme --version ]]> + </version_command> <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> <exit_code range="1:" level="fatal" /> </stdio> + <command>fastme --input_data=$input + --output_tree=$outputTree + --output_matrix=$outputMatrix + --output_info=$outputLog + --nb_threads=\${GALAXY_SLOTS:-1} - - <!-- [REQUIRED] The command to execute --> - <command> - fastme --input_data=$input_data --dna=$model --output_tree=$fileout - #if str( $distance ) == "SPR": - --spr - #elif str( $distance )[:3] == "NNI" : - #if str( $distance ) == "NNI_B" : - --nni=B - #else : - --nni=O - #end if + #if $typeChoice.datatype =="d" + --dna=$typeChoice.modeldna + + #else if $typeChoice.datatype =="p" + --protein=$typeChoice.modelprot + + #else if $typeChoice.datatype =="cfg": + ## Read information of sequence type + ## read an info file to choose which option set and set a model by default + #set $info = open( str($input_info) ).read() + #if 'dna' in $info: + --dna=T + #else if 'protein' in $info: + --protein=L + #end if + #end if + + #if $gammaChoice.gamma == "true" + --gamma=$gammaChoice.rate + #end if + + $distance + $equilibrium + $removeGap + $treeRefinement + + #if $bootChoice.boot == "true" + --output_boot=$outputBoostrap + --bootstrap=$bootChoice.replicates + #end if + + >> tmp_stdout; + cat tmp_stdout > $outputLog; - #else : - --method=$distance - #end if - > $fileout_log </command> - - - - <!-- [REQUIRED] Input files and tool parameters --> <inputs> - <param name="input_data" type="data" format="txt" optional="false" label="Phylip input" /> - <param name="fileout_label" type="text" value="Newick tree" label="Output name" help="Output name for files" /> - <param name="model" type="select" label="Evolutionary model" > - <option value="p">p-distance</option> - <option value="Y">RY symetric</option> - <option value="R">RY</option> - <option value="J">JC69</option> - <option value="K">K2P</option> - <option value="1">F81</option> - <option value="4" selected="true">F84</option> - <option value="T">TN93</option> - <option value="L">LogDet</option> + <param name="input" type="data" format="phylip, phy" label="Fastme input" help="Phylip Alignment or Matrix file"/> + <param name="fileout_label" type="text" value="Newick tree" label="Output name" help="Output name for files" /> + <conditional name="typeChoice"> + <param name="datatype" type="select" display="radio" label="Data type" > + <option value="d">DNA</option> + <option value="p">Protein</option> + <option value="m">Matrix</option> + <option value="cfg">Config file</option> + </param> + <when value="d"> + <param name="modeldna" type="select" label="Evolutionary model"> + <option value="4">F84</option> + <option value="R">RY</option> + <option value="1">F81</option> + <option value="J">JC69</option> + <option value="K">K2P</option> + <option value="T" selected="true">TN93</option> + <option value="p">p-distance</option> + </param> + </when> + <when value="p"> + <param name="modelprot" type="select" label="Evolutionary model"> + <option value="L">LG</option> + <option value="W">WAG</option> + <option value="J">JTT</option> + <option value="h">Day off</option> + <option value="C">CpRev</option> + <option value="D">DCMut</option> + <option value="b">HIVb</option> + <option value="I">HIVw</option> + <option value="M">MtREV</option> + <option value="R">RtREV</option> + <option value="p">p-distance</option> + </param> + </when> + <when value="m"/> + <when value="cfg"> + <param name="input_info" format="txt" type="data" multiple="false" label="Config file" help="Precompute file containning sequence description (dna or protein)"/> + </when> + </conditional> + <param name="equilibrium" type="boolean" label="Equilibrium frequencies" truevalue="--equilibrium" falsevalue="" help="By default, frequencies are globally counted from the nucleotides alignment or defined by the proteic substitution model. By checking the box, frequencies are pairwise estimated by counting the nucleotides or estimated by counting the amino-acids in the alignment."/> + <conditional name="gammaChoice"> + <param name="gamma" type="select" label="Gamma distributed rates across sites"> + <option value="true">Yes</option> + <option value="false">No</option> + </param> + <when value="true"> + <param name="rate" type="float" label="Gamma distribution parameter" value="1" min="0"/> + </when> + <when value="false" /> + </conditional> + <param name="removeGap" type="select" display="radio" label="Remove gap strategy"> + <option value="">Pairwise deletion of gaps</option> + <option value="--remove_gap">Remove all sites with gap</option> </param> - <param name="distance" type="select" label="Distance methode" > - <option value="B">TaxAdd_BalME</option> - <option value="O">TaxAdd_OLSME</option> - <option value="I" selected="true">BIONJ</option> - <option value="N">NJ</option> - <option value="U">UNJ</option> - <option value="NNI_B">NNI_BalME</option> - <option value="NNI_O">NNI_OLS</option> - <option value="SPR">SPR</option> + <param name="distance" type="select" label="Distance algorithm"> + <option value="--method=I">BIONJ</option> + <option value="--method=B --branch_length=B">TaxAdd BalME</option> + <option value="--method=O --branch_length=O">TaxAdd OLSME</option> + <option value="--method=N">NJ</option> + <option value="--method=U">UNJ</option> + </param> + <param name="treeRefinement" type="select" display="radio" label="Tree Refinement" help='(NNI) Nearest Neighbor Interchanges,(SPR) Subtree Pruning and Regrafting '> + <option value="">None</option> + <option value="--nni=O">OLS NNI</option> + <option value="--nni=B">BalME NNI</option> + <option value="--spr" selected="true">BalME SPR</option> + <option value="--nni=B --spr">BalME NNI + SPR</option> </param> - </inputs> - - <!-- [REQUIRED] Output files --> + <conditional name="bootChoice"> + <param name="boot" type="select" label="Bootstrap"> + <option value="false">No</option> + <option value="true">Yes</option> + </param> + <when value="false"/> + <when value="true"> + <param name="replicates" type="integer" value="1000" label="Number of replicates"/> + </when> + </conditional> + </inputs> <outputs> - <data name="fileout_log" type="data" format="txt" label="${fileout_label}.log" /> - <data name="fileout" type="data" format="txt" label="${fileout_label}" /> + <data name="outputTree" format="nwk" label="${fileout_label}"/> + <data name="outputLog" format="txt" label="FastME Information"/> + <data name="outputBoostrap" format="nwk" label="FastME Bootstrap trees"> + <filter>bootChoice['boot'] == "true"</filter> + </data> + <data name="outputMatrix" format="txt" label="FastME Distance matrix"/> </outputs> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - <test> - <param name="input_data" value="phylip" /> - <param name="model" value="4"/> - <param name="distance" value="I"/> - <output name="fileout" file="newick" /> + <param name="input" value="phylip" /> + <param name="modeldna" value="4"/> + <param name="gamma" value="false"/> + <param name="treeRefinement" value=""/> + <param name="distance" value="--method=I"/> + <output name="outputTree" file="newick" /> </test> - - <!-- [HELP] Multiple tests can be defined with different parameters --> -<!-- <test> + <param name="input" value="phylip" /> + <param name="modeldna" value="T"/> + <param name="distance" value="--method=I"/> + <param name="treeRefinement" value="--nni=B --spr"/> + <param name="fileout_label" value="TreeTn93BionjSpr.nwk" /> + <output name="outputTree" file="TreeTn93BionjSpr.nwk" /> </test> ---> </tests> - - <!-- [OPTIONAL] Help displayed in Galaxy --> <help> .. class:: infomark -**Authors** - | Richard Desper and Olivier Gascuel, - | Journal of Computational Biology 19(5), 687-705, 2002. - | Molecular Biology and Evolution 21(3), 587-598, 2004. - | Please cite these papers if you use this software in your publications. +**FastME version 2.1.4** .. class:: infomark -**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. -.. class:: infomark - -**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr - ---------------------------------------------------- +**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. Correia Damien, CNRS. +**Support** For any questions about Galaxy integration, please send an e-mail to vincent.lefort@lirmm.fr -====== -FastMe -====== +----- + + +############ +FastME 2.0 +############ + +Comprehensive, accurate and fast distance-based phylogeny inference program + ----------- Description ----------- - - FastME - A distance based phylogeny reconstruction algorithm. - - FastME showed better topological accuracy than NJ, - BIONJ, WEIGHBOR and FITCH, in all evolutionary - conditions we tested, which include large range - deviations from molecular clock and substitution rates. - - - ------------------ -Workflow position ------------------ - -**Upstream tools** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -Readseq phylip conversion phylip -=========== ========================== ======= - - -**Downstream tools** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -Rooting out tree Newick -=========== ========================== ======= +FastME provides distance algorithms to infer phylogenies. It's based on balanced minimum evolution, which is the very principle of NJ. - ----------- -Input file ----------- - -Phylip file - Phylip file with sequence alignments - - ----------- -Parameters ----------- - -Output name - Output base name for the ouput files - -Evolutionary model - Indicate the evolutionary [model] which can be choosen from:p-distance, RY symmetric, RY, JC69, K2P, F81, F84 (default), TN93, LogDet. - -Distance methode - FastME computes a tree using a distance algorithm. You may choose this method/topologie from: TaxAdd_BalME, TaxAdd_OLSME, BIONJ (default), NJ, UNJ, NNI_BalME, NNI_OLS or SPR - ------------- -Output files ------------- - -Output_name - Resulting tree at Newick format - -Output_name.log - Log file +FastME included Nearest Neighbor Interchange (NNI) and also Subtree Pruning and Regrafting (SPR), while remaining as fast as NJ and providing a number of facilities: distance estimation for DNA and proteins with various models and options, bootstrapping, and parallel computations. ------------ Dependencies ------------ FastME - http://www.atgc-montpellier.fr/fastme - + http://www.atgc-montpellier.fr/fastme + --------------------------------------------------- @@ -199,54 +207,154 @@ Input files =========== -Philip file ------------ +**Phylip file**:: + -:: + 4 120 + Orangutan CCAAACGACA TTTCATATGC TGTCATTTCT GAAGATATGA GACAAGTGAG CTGTCCGGAA + Gorilla CCAAACAACA TTTCATGTGC TGTCATTTCT GAAGATATGA GACAAGTGAG CTCTCCGAAA + Human CCAAACAACA TTTCATGTGC TGTCATTTCT GAAGATAAGA GACAAGTGAG CTCTCCGAAA + Chimp CCAAACAACA TTTCATGTGC TGTCATTTCT GAAGATATGA GACAAGTGAG CTCTCCGAAA - 168 5125 - IRAT112 GAGAACCGTC CTGTAAGTAC TCTTGCTTTA AGTAATAAAG TAATACTAAT - KARASUKARA GAGAACCGTC CTGTAAGTAC TCTTGCTTTA AATACGAAAG TAATACTAAT + CCAACATATC AGACATATGT GAATTTCAAT TATTGTACGG GCATCCTGGG CTCTCAAAGT + CCAAGATATC AGACATTTAT GAATTTCAAT TATTGTACGG GCATCCTGGG CTCTCAAAGT + CCAAGATACC AGACATTTGT GAATTTCAAT TATTGTACTG GCATCCTGGG CTCTCAAAGT + CCAAGATATC AGACATTTAT GAATTTCAAT TATTGTACTG GCATCCTGGG CTCTCAAAGT + + Parameters ========== -Output name -> Newick tree +:: -Evolutionary model -> F84 + Output name: Newick tree + Evolutionary model : TN93 + Distance method : BIONJ -Distance methode -> BIONJ Output files ============ -Newick tree ------------ +**Newick tree**:: + + ((Gorilla:0.005755,Orangutan:0.020680):0.001063,Human:0.006655,Chimp:0.002132); + +----- + +OPTIONS +======= + + + -i file, --input_data=file + The input data file contains sequence alignment(s) or a distance matrix(ces). + + -u input_tree_file, --user_tree=input_tree_file + FastME may use an existing topology available in the input user tree file which corresponds to the input dataset. + + -o output_tree_file, --output_tree=output_tree_file + FastME will write the infered tree into the output tree file. + + -O output_matrix_file, --output_matrix=output_matrix_file + Use output matrix file option if you want FastME to write the distances + matrix computed from the input alignment in the output matrix file. + + -I output_information_file, --output_info=output_information_file + Use this option if you want FastME to write information + about its execution in the output information file. + + -B output_bootstrap_trees_file, --output_boot=output_bootstrap_trees_file + Use this option if you want FastME to write bootstrap trees + in the bootstrap trees file. + + -a, --append + Use this option to append results to existing output files (if any). + By default output files will be overwritten. + + -m method, --method=method + FastME computes a tree using a distance algorithm. + You may choose this method from: + TaxAdd_(B)alME, TaxAdd_(O)LSME, B(I)ONJ (default), + (N)J or (U)NJ. + + -d model, --dna=model + Use this option if your input data file contains DNA sequences alignment(s). + You may also indicate the evolutionary [model] which can be choosen from: + (p)-distance, R(Y) symmetric, (R)Y, (J)C69, (K)2P, F8(1), F8(4) (default), (T)N93, (L)ogDet. + + -p model, --protein=model + Use this option if your input data file contains protein sequences alignment(s). + You may also indicate the evolutionary [model] which can be choosen from: + (p)-distance, (F)81 like, (L)G (default), (W)AG, (J)TT, Day(h)off, (D)CMut, (C)pRev, + (M)tREV, (R)tREV, HIV(b), H(I)Vw or FL(U). + + -r, --remove_gap + Use this option to completely remove any site which has a gap in + any sequence. By default, FastME is doing pairwise deletion of gaps. -:: + -e, --equilibrium + The equilibrium frequencies for DNA are always estimated by counting + the occurence of the nucleotides in the input alignment. + For amino-acid sequences, the equilibrium frequencies are estimated + using the frequencies defined by the substitution model. + Use this option if you whish to estimate the amino-acid frequencies + by counting their occurence in the input alignment. + + -g alpha, --gamma=alpha + Use this option if you wish to have gamma distributed rates across sites. + By default, FastME runs with no gamma variation. + If running FastME with gamma distributed rates across sites, the [alpha] default value is 1.0. + Only helpful when the input data file contains sequences alignment(s). + + -n NNI, --nni=NNI + Use this option to do [NNI] tree topology improvement. + You may choose the [NNI] type from: + NNI_(B)alME (default) or NNI_(O)LS. + + -s, --spr + Use this option to do SPR tree topology improvement. + + -w branch, --branch_length=branch + Use this option to indicate the branch length to assign to the tree. + Only helpful when not improving the tree topology (no NNI nor SPR). + You may choose the branch length from: + (B)alLS (default), (O)LS or (n)one. (n)one is only available with BIONJ, NJ or UNJ. - (((((((((((((((((((((((((GOGOLEMPUK:0.001198,GOGOLEMPAK:0.002128):0.030378,TREMBESE:0.013258):0.055246,(((JIMBRUKJOL:0.045219,KETANKONIR:0.035298):0.006267, ... - + -D datasets, --datasets=datasets + Use this option to indicate the number of datasets in your input + data file. Default value is 1. + + -b replicates, --bootstrap=replicates + Use this option to indicate the number of replicates FastME will + do for bootstrapping. Default value is 0. + Only helpful when the input data file contains sequences alignment(s). + + -z seed, --seed=seed + Use this option to initialize randomization with seed value. Only helpful when bootstrapping. + + -c Use this option if you want FastME only to compute distance matrix. + Only helpful when the input data file contains sequences alignment(s). + + -T number_of_threads, --nb_threads=number_of_threads + Use this option to set the number of threads to use. + Default number of threads is 4. + + -v value, --verbose=value + Sets the verbose level to value [0-3]. Default value is 0 + + -V, --version + Prints the FastME version. + + -h, --help Display this usage. + + +For further informations, please visite FastME_ + +.. _FastME: http://www.atgc-montpellier.fr/fastme/usersguide.php </help> - <citations> - <!-- [HELP] As DOI or BibTex entry --> - <citation type="bibtex"> -@article{Lefort30062015, -author = {Lefort, Vincent and Desper, Richard and Gascuel, Olivier}, -title = {FastME 2.0: A Comprehensive, Accurate, and Fast Distance-Based Phylogeny Inference Program}, -year = {2015}, -doi = {10.1093/molbev/msv150}, -abstract ={FastME provides distance algorithms to infer phylogenies. FastME is based on balanced minimum evolution, which is the very principle of Neighbor Joining (NJ). FastME improves over NJ by performing topological moves using fast, sophisticated algorithms. The first version of FastME only included Nearest Neighbor Interchange. The new 2.0 version also includes Subtree Pruning and Regrafting, while remaining as fast as NJ and providing a number of facilities: Distance estimation for DNA and proteins with various models and options, bootstrapping, and parallel computations. FastME is available using several interfaces: Command-line (to be integrated in pipelines), PHYLIP-like, and a Web server (http://www.atgc-montpellier.fr/fastme/).}, -URL = {http://mbe.oxfordjournals.org/content/early/2015/07/25/molbev.msv150.abstract}, -eprint = {http://mbe.oxfordjournals.org/content/early/2015/07/25/molbev.msv150.full.pdf+html}, -journal = {Molecular Biology and Evolution} -} - - </citation> - + <citation type="doi">10.1093/molbev/msv150</citation> + <citation type="doi">10.1089/106652702761034136</citation> </citations> - - -</tool> +</tool> \ No newline at end of file