Mercurial > repos > earlhaminst > ete
changeset 3:077021c45b96 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
author | earlhaminst |
---|---|
date | Mon, 12 Mar 2018 12:51:48 -0400 |
parents | 03c10736e497 |
children | 87b6de3ef63e |
files | ete_genetree_splitter.py ete_genetree_splitter.xml ete_init_taxdb.xml ete_lineage_generator.xml ete_mod.xml ete_species_tree_generator.xml test-data/11_genetree.nhx test-data/12_genetree.nhx test-data/13_genetree.nhx test-data/14_genetree.nhx test-data/21_genetree.nhx test-data/22_genetree.nhx test-data/23_genetree.nhx test-data/24_genetree.nhx test-data/25_genetree.nhx test-data/genetree.nhx test-data/speciestree.nhx |
diffstat | 17 files changed, 247 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ete_genetree_splitter.py Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,56 @@ +from __future__ import print_function + +import optparse + +from ete3 import PhyloTree + + +def main(): + usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]" + parser = optparse.OptionParser(usage=usage) + parser.add_option('--genetree', help='GeneTree in nhx format') + parser.add_option('--speciestree', help='Species Tree in nhx format') + parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)') + parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene') + parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose') + parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)') + options, args = parser.parse_args() + + if options.genetree is None: + parser.error("--genetree option must be specified, GeneTree in nhx format") + + # reads single gene tree + genetree = PhyloTree(options.genetree) + + # sets species naming function + if options.gene_node == 0: + genetree.set_species_naming_function(parse_sp_name) + + # reconcile species tree with gene tree to help find out gene gain/lose + if options.gainlose: + + if options.speciestree is None: + parser.error("--speciestree option must be specified, species tree in nhx format") + + # reads species tree + speciestree = PhyloTree(options.speciestree, format=options.species_format) + + # Removes '*' from Species names comes from Species tree configrured for TreeBest + for leaf in speciestree: + leaf.name = leaf.name.strip('*') + + genetree, events = genetree.reconcile(speciestree) + + # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes. + for cluster_id, node in enumerate(genetree.split_by_dups(), 1): + outfile = str(cluster_id) + '_genetree.nhx' + with open(outfile, 'w') as f: + f.write(node.write(format=options.output_format)) + + +def parse_sp_name(node_name): + return node_name.split("_")[1] + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ete_genetree_splitter.xml Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,123 @@ +<tool id="ete_genetree_splitter" name="ETE GeneTree splitter" version="@VERSION@.1"> + <description>from a genetree using the ETE Toolkit</description> + <macros> + <import>ete_macros.xml</import> + </macros> + <expand macro="requirements" /> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ +python '$__tool_directory__/ete_genetree_splitter.py' +--genetree '$genetreeFile' +--gene_node $gene_node +#if $gainlose_conditional.gainlose == "True" + --speciestree '$gainlose_conditional.speciesFile' + --species_format $gainlose_conditional.species_format + --gainlose +#end if +--output_format $output_format + ]]></command> + <inputs> + <param name="genetreeFile" type="data" format="nhx" label="GeneTree file" help="GeneTree in nhx format" /> + <param name="gene_node" type="select" label="Select Gene node format" help="Select Gene node format from one of the option"> + <option value="0" selected="true">gene_species</option> + <option value="1">species_gene</option> + </param> + <conditional name="gainlose_conditional"> + <param name="gainlose" type="select" label="Find out gene gain/lose"> + <option value="True">Yes</option> + <option value="False" selected="true">No</option> + </param> + <when value="False" /> + <when value="True"> + <param name="speciesFile" type="data" format="nhx" label="Species file" help="Species Tree in nhx format" /> + <param name="species_format" type="select" label="Input species tree format" help="See table below"> + <option value="0">Flexible with support values (0)</option> + <option value="1">Flexible with internal node names (1)</option> + <option value="2">All branches + leaf names + internal supports (2)</option> + <option value="3">All branches + all names (3)</option> + <option value="4">Leaf branches + leaf names (4)</option> + <option value="5">Internal and leaf branches + leaf names (5)</option> + <option value="6">Internal branches + leaf names (6)</option> + <option value="7">Leaf branches + all names (7)</option> + <option value="8" selected="true">All names (8)</option> + <option value="9">Leaf names (9)</option> + <option value="100">Topology only (100)</option> + </param> + </when> + </conditional> + + <param name="output_format" type="select" label="Output GeneTree format" help="See table below"> + <option value="0">Flexible with support values (0)</option> + <option value="1">Flexible with internal node names (1)</option> + <option value="2">All branches + leaf names + internal supports (2)</option> + <option value="3">All branches + all names (3)</option> + <option value="4">Leaf branches + leaf names (4)</option> + <option value="5">Internal and leaf branches + leaf names (5)</option> + <option value="6">Internal branches + leaf names (6)</option> + <option value="7">Leaf branches + all names (7)</option> + <option value="8">All names (8)</option> + <option value="9" selected="true">Leaf names (9)</option> + <option value="100">Topology only (100)</option> + </param> + </inputs> + <outputs> + <collection name="genetrees_lists" type="list" label="${tool.name} on ${on_string}"> + <discover_datasets pattern="(?P<designation>.+)_genetree\.nhx" ext="nhx" /> + </collection> + </outputs> + <tests> + <test> + <param name="genetreeFile" ftype="nhx" value="genetree.nhx" /> + <param name="gene_node" value="0"/> + <param name="output_format" value="9" /> + <output_collection name="genetrees_lists" type="list" count="4"> + <element name="1" file="11_genetree.nhx" ftype="nhx" /> + <element name="2" file="12_genetree.nhx" ftype="nhx" /> + <element name="3" file="13_genetree.nhx" ftype="nhx" /> + <element name="4" file="14_genetree.nhx" ftype="nhx" /> + </output_collection> + </test> + <test> + <param name="genetreeFile" ftype="nhx" value="genetree.nhx" /> + <param name="gene_node" value="0"/> + <param name="gainlose" value="True" /> + <param name="speciesFile" ftype="nhx" value="speciestree.nhx" /> + <param name="species_format" value="8" /> + <param name="output_format" value="9" /> + <output_collection name="genetrees_lists" type="list" count="5"> + <element name="1" file="21_genetree.nhx" ftype="nhx" /> + <element name="2" file="22_genetree.nhx" ftype="nhx" /> + <element name="3" file="23_genetree.nhx" ftype="nhx" /> + <element name="4" file="24_genetree.nhx" ftype="nhx" /> + <element name="5" file="25_genetree.nhx" ftype="nhx" /> + </output_collection> + </test> + </tests> + <help><![CDATA[ +Split GeneTrees from single GeneTree by duplication event using the `ETE Toolkit`_. + +.. _ETE Toolkit: http://etetoolkit.org/ + +**Output format:** + +======= ============================================= ======================================================================================== +FORMAT DESCRIPTION SAMPLE +------- --------------------------------------------- ---------------------------------------------------------------------------------------- +0 flexible with support values ((D:0.723274,F:0.567784)1.000000:0.067192,(B:0.279326,H:0.756049)1.000000:0.807788); +1 flexible with internal node names ((D:0.723274,F:0.567784)E:0.067192,(B:0.279326,H:0.756049)B:0.807788); +2 all branches + leaf names + internal supports ((D:0.723274,F:0.567784)1.000000:0.067192,(B:0.279326,H:0.756049)1.000000:0.807788); +3 all branches + all names ((D:0.723274,F:0.567784)E:0.067192,(B:0.279326,H:0.756049)B:0.807788); +4 leaf branches + leaf names ((D:0.723274,F:0.567784),(B:0.279326,H:0.756049)); +5 internal and leaf branches + leaf names ((D:0.723274,F:0.567784):0.067192,(B:0.279326,H:0.756049):0.807788); +6 internal branches + leaf names ((D,F):0.067192,(B,H):0.807788); +7 leaf branches + all names ((D:0.723274,F:0.567784)E,(B:0.279326,H:0.756049)B); +8 all names ((D,F)E,(B,H)B); +9 leaf names ((D,F),(B,H)); +100 topology only ((,),(,)); +======= ============================================= ======================================================================================== + ]]></help> + <expand macro="citations" /> +</tool>
--- a/ete_init_taxdb.xml Tue Nov 07 11:45:13 2017 -0500 +++ b/ete_init_taxdb.xml Mon Mar 12 12:51:48 2018 -0400 @@ -34,7 +34,9 @@ </test> </tests> <help><![CDATA[ -Generates the ETE sqlite data base from the NCBI taxdump.tar.gz using the .. _ETE Toolkit: http://etetoolkit.org/ +Generates the ETE sqlite data base from the NCBI taxdump.tar.gz using the `ETE Toolkit`_. + +.. _ETE Toolkit: http://etetoolkit.org/ **Input**
--- a/ete_lineage_generator.xml Tue Nov 07 11:45:13 2017 -0500 +++ b/ete_lineage_generator.xml Mon Mar 12 12:51:48 2018 -0400 @@ -115,7 +115,9 @@ </test> </tests> <help><![CDATA[ -Generates a table with lineage information for a list of species (also taxids and arbitrary taxons are accepted) using the .. _ETE Toolkit: http://etetoolkit.org/ +Generates a table with lineage information for a list of species (also taxids and arbitrary taxons are accepted) using the `ETE Toolkit`_. + +.. _ETE Toolkit: http://etetoolkit.org/ **Input**
--- a/ete_mod.xml Tue Nov 07 11:45:13 2017 -0500 +++ b/ete_mod.xml Mon Mar 12 12:51:48 2018 -0400 @@ -102,7 +102,9 @@ </test> </tests> <help><![CDATA[ -Modify a given input tree using the etetoolkit: http://etetoolkit.org/ +Modify a given input tree using the `ETE Toolkit`_. + +.. _ETE Toolkit: http://etetoolkit.org/ ]]></help> <expand macro="citations" /> </tool>
--- a/ete_species_tree_generator.xml Tue Nov 07 11:45:13 2017 -0500 +++ b/ete_species_tree_generator.xml Mon Mar 12 12:51:48 2018 -0400 @@ -8,8 +8,7 @@ <!-- Anything other than zero is an error --> <exit_code range="1:" /> </stdio> - <command> - <![CDATA[ + <command><![CDATA[ python '$__tool_directory__/ete_species_tree_generator.py' -s '$speciesFile' -d $database @@ -20,8 +19,7 @@ -f ${output_format.format_selector} #end if -t $output_format.treebest - ]]> - </command> + ]]></command> <inputs> <param name="speciesFile" type="data" format="txt" label="Species file" help="List with one species per line" /> <param name="database" type="data" format="sqlite" label="(ETE3) Taxonomy Database" help="The sqlite formatted Taxonomy used by ETE3 (which is derived from NCBI taxonomy)" /> @@ -64,8 +62,7 @@ </output> </test> </tests> - <help> - <![CDATA[ + <help><![CDATA[ Generate a species tree from a list of species using the `ETE Toolkit`_. .. _ETE Toolkit: http://etetoolkit.org/ @@ -87,7 +84,6 @@ 9 leaf names ((D,F),(B,H)); 100 topology only ((,),(,)); ======= ============================================= ======================================================================================== - ]]> - </help> + ]]></help> <expand macro="citations" /> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/11_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((insr_rattusnorvegicus,insr_musmusculus),(insr_homosapiens,insr_pantroglodytes)),insr_susscrofa); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/12_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +((maob_rattusnorvegicus,maob_musmusculus),((maob_homosapiens,maob_pantroglodytes),(maob_susscrofa,maob_canisfamiliaris))); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/13_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((maoa_rattusnorvegicus,maoa_musmusculus),(maoa_homosapiens,maoa_pantroglodytes)),(maoa_susscrofa,maoa_canisfamiliaris)); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/14_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((brat1_rattusnorvegicus,brat1_musmusculus),(brat1_homosapiens,brat1_pantroglodytes)),(brat1_susscrofa,brat1_canisfamiliaris)); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/21_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +((canisfamiliaris,insr_susscrofa),((insr_rattusnorvegicus,insr_musmusculus),(insr_homosapiens,insr_pantroglodytes))); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/22_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((pantroglodytes,homosapiens),(maob_rattusnorvegicus,maob_musmusculus)),(canisfamiliaris,susscrofa)); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/23_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((rattusnorvegicus,musmusculus),(maob_homosapiens,maob_pantroglodytes)),(maob_susscrofa,maob_canisfamiliaris)); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/24_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((maoa_rattusnorvegicus,maoa_musmusculus),(maoa_homosapiens,maoa_pantroglodytes)),(maoa_susscrofa,maoa_canisfamiliaris)); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/25_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((brat1_rattusnorvegicus,brat1_musmusculus),(brat1_homosapiens,brat1_pantroglodytes)),(brat1_susscrofa,brat1_canisfamiliaris)); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,45 @@ +((((insr_rattusnorvegicus:0.028171[&&NHX:S=rtn], +insr_musmusculus:0.02455[&&NHX:S=mms] +):0.088469[&&NHX:D=N:S=39107:T=31:B=100], +(insr_homosapiens:0.001221[&&NHX:S=hms], +insr_pantroglodytes:0.005279[&&NHX:S=ptr] +):0.043136[&&NHX:D=N:S=207598:T=31:B=100] +):0.014125[&&NHX:D=N:S=314146:T=5:B=13], +insr_susscrofa:0.051479[&&NHX:E=$-canisfamiliaris:S=ssf] +):6.45887[&&NHX:D=N:S=root:T=5:B=13], +((((maob_rattusnorvegicus:0.032474[&&NHX:S=rtn], +maob_musmusculus:0.026249[&&NHX:S=mms] +):0.066927[&&NHX:D=N:E=$-207598-314145:S=39107:T=31:B=100], +((maob_homosapiens:0.003734[&&NHX:S=hms], +maob_pantroglodytes:0.003601[&&NHX:S=ptr] +):0.03495[&&NHX:D=N:E=$-39107:S=207598:T=31:B=100], +(maob_susscrofa:0.074597[&&NHX:S=ssf], +maob_canisfamiliaris:0.041979[&&NHX:S=cfs] +):0.016661[&&NHX:D=N:S=314145:T=17:B=67] +):0.030385[&&NHX:D=N:S=root:T=17:B=66] +):0.144506[&&NHX:D=Y:SIS=0:DCS=0.0000:DD=Y:S=root:T=27:B=99], +(((maoa_rattusnorvegicus:0.02415[&&NHX:S=rtn], +maoa_musmusculus:0.020631[&&NHX:S=mms] +):0.113773[&&NHX:D=N:S=39107:T=31:B=100], +(maoa_homosapiens:0.005275[&&NHX:S=hms], +maoa_pantroglodytes:0.003923[&&NHX:S=ptr] +):0.053818[&&NHX:D=N:S=207598:T=31:B=100] +):0.029482[&&NHX:D=N:S=314146:T=5:B=49], +(maoa_susscrofa:0.080747[&&NHX:S=ssf], +maoa_canisfamiliaris:0.074906[&&NHX:S=cfs] +):0.010572[&&NHX:D=N:S=314145:T=23:B=100] +):0.227145[&&NHX:D=N:S=root:T=5:B=49] +):4.96729[&&NHX:D=Y:SIS=100:DCS=1.0000:S=root:T=31:B=100], +(((brat1_rattusnorvegicus:0.027406[&&NHX:S=rtn], +brat1_musmusculus:0.035144[&&NHX:S=mms] +):0.174589[&&NHX:D=N:S=39107:T=31:B=100], +(brat1_homosapiens:0.003983[&&NHX:S=hms], +brat1_pantroglodytes:0.009203[&&NHX:S=ptr] +):0.096543[&&NHX:D=N:S=207598:T=31:B=100] +):0.034504[&&NHX:D=N:S=314146:T=5:B=21], +(brat1_susscrofa:0.106917[&&NHX:S=ssf], +brat1_canisfamiliaris:0.098716[&&NHX:S=cfs] +):0.029758[&&NHX:D=N:S=314145:T=31:B=100] +):5.24228[&&NHX:D=N:S=root:T=5:B=21] +):1.10311[&&NHX:D=Y:SIS=100:DCS=1.0000:S=root:T=29:B=100] +)[&&NHX:D=Y:SIS=83:DCS=0.8333:S=root:B=0];