view hyphy_prime.xml @ 8:1d9d3c6bc246 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit f6cabbd9eb2eea9f5d44fe9836cc5f6d6087a839"
author iuc
date Fri, 30 Oct 2020 16:26:08 +0000
parents 8f66e659530b
children 296f6bc03fa6
line wrap: on
line source

<?xml version="1.0"?>
<tool id="hyphy_prime" name="HyPhy-PRIME" version="@VERSION@+galaxy0" profile="19.09">
    <description>Property Informed Models of Evolution</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        ln -s '$input_file' prime_input.fa &&
        ln -s '$input_nhx' prime_input.nhx &&
        @HYPHYMPI@ prime
            --alignment ./prime_input.fa
            --tree ./prime_input.nhx
            --code '$gencodeid'
            --branches '$branches'
            --pvalue '$p_value'
            --properties '$prop_set'
            --output '$prime_output'
            > '$prime_log'
            ;
        @CATCH_MPIERR@
    ]]></command>
    <inputs>
        <expand macro="inputs"/>
        <expand macro="gencode"/>
        <expand macro="branches"/>
        <param name="prop_set" type="select" label="Biochemical properties to use">
            <option value="Atchley">Atchley</option>
            <option value="LCAP">LCAP</option>
        </param>
        <param name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/>
    </inputs>
    <outputs>
        <data name="prime_log" format="txt"/>
        <data name="prime_output" format="hyphy_results.json" />
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="fasta" value="prime-in1.fa"/>
            <param name="input_nhx" ftype="nhx" value="prime-in1.nhx"/>
            <param name="branches" value="All"/>
            <param name="p_value" value="0.1"/>
            <param name="prop_set" value="Atchley"/>
            <output name="prime_output" file="prime-out1.json" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[
PRIME: Property Informed Model of Evolution
===========================================

What question does this method answer?
--------------------------------------

Does evolution at specific sites in a coding alignment preserve or alter some biochemical properties?

Recommended Applications
------------------------

Identify biochemical evolutionary constraints or changes with site level resolution: e.g. site 23 is
evolving to conserve residue polarity, but alter it's volume.


Brief description
-----------------

Most methods of coding sequence analysis do not take direct account of the fact that
the rate at which amino-acids are exchanged is different depending on the amino-acids.
While this seems obvious (e.g. radical changes should happen slower), there are many technical reasons
for why the standard assumption of "one-rate for all residues" holds.


Given a set of N amino-acid properties, fit a site-level model where non-synonymous rates
depend on how much a non-synonymous substitution changes the properties
of the residue, beta (X,Y) = Exp (log_omega - lambda_1 * diff_1 (X,Y )-
lambda_2 * diff_2 (X,Y) -...). When lambda_k > 0, changes in property k
are disfavored and when lambda_k < 0 -- they are promoted. At each site,
N+1 tests are performed (one for each property, and an omnibus test).

Input
-----

1. A *FASTA* sequence alignment.
2. A phylogenetic tree in the *Newick* format

Note: the names of sequences in the alignment must match the names of the sequences in the tree.


Output
------

A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf).

A custom visualization module for viewing these results will soon be available at http://vision.hyphy.org/

Further reading
---------------

http://hyphy.org/methods/selection-methods/#PRIME


Tool options
------------
::


    --code              Which genetic code to use

    --branches          Which branches should be tested for selection?
                            All [default] : test all branches

                            Internal : test only internal branches (suitable for
                            intra-host pathogen evolution for example, where terminal branches
                            may contain polymorphism data)

                            Leaves: test only terminal (leaf) branches

                            Unlabeled: if the Newick string is labeled using the {} notation,
                            test only branches without explicit labels
                            (see http://hyphy.org/tutorials/phylotree/)

     --pvalue           The significance level used to determine significance

     --properties       Which property set to use
                            Atchley : Use the five properties derived from a factor analysis of 500 amino-acid properties [Table 2 in PNAS (2005) 102(18) 6395-6400 doi: 10.1073/pnas.0408677102]
                            LCAP: Use the five properties defined in the Conant and Stadler LCAP model [Mol Biol Evol (2009) 26 (5): 1155-1161. doi: 10.1093/molbev/msp031]


    ]]></help>
    <expand macro="citations" />
</tool>