changeset 3:077021c45b96 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
author earlhaminst
date Mon, 12 Mar 2018 12:51:48 -0400
parents 03c10736e497
children 87b6de3ef63e
files ete_genetree_splitter.py ete_genetree_splitter.xml ete_init_taxdb.xml ete_lineage_generator.xml ete_mod.xml ete_species_tree_generator.xml test-data/11_genetree.nhx test-data/12_genetree.nhx test-data/13_genetree.nhx test-data/14_genetree.nhx test-data/21_genetree.nhx test-data/22_genetree.nhx test-data/23_genetree.nhx test-data/24_genetree.nhx test-data/25_genetree.nhx test-data/genetree.nhx test-data/speciestree.nhx
diffstat 17 files changed, 247 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ete_genetree_splitter.py	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,56 @@
+from __future__ import print_function
+
+import optparse
+
+from ete3 import PhyloTree
+
+
+def main():
+    usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]"
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option('--genetree', help='GeneTree in nhx format')
+    parser.add_option('--speciestree', help='Species Tree in nhx format')
+    parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)')
+    parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene')
+    parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose')
+    parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)')
+    options, args = parser.parse_args()
+
+    if options.genetree is None:
+        parser.error("--genetree option must be specified, GeneTree in nhx format")
+
+    # reads single gene tree
+    genetree = PhyloTree(options.genetree)
+
+    # sets species naming function
+    if options.gene_node == 0:
+        genetree.set_species_naming_function(parse_sp_name)
+
+    # reconcile species tree with gene tree to help find out gene gain/lose
+    if options.gainlose:
+
+        if options.speciestree is None:
+            parser.error("--speciestree option must be specified, species tree in nhx format")
+
+        # reads species tree
+        speciestree = PhyloTree(options.speciestree, format=options.species_format)
+
+        # Removes '*' from Species names comes from Species tree configrured for TreeBest
+        for leaf in speciestree:
+            leaf.name = leaf.name.strip('*')
+
+        genetree, events = genetree.reconcile(speciestree)
+
+    # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes.
+    for cluster_id, node in enumerate(genetree.split_by_dups(), 1):
+        outfile = str(cluster_id) + '_genetree.nhx'
+        with open(outfile, 'w') as f:
+            f.write(node.write(format=options.output_format))
+
+
+def parse_sp_name(node_name):
+    return node_name.split("_")[1]
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ete_genetree_splitter.xml	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,123 @@
+<tool id="ete_genetree_splitter" name="ETE GeneTree splitter" version="@VERSION@.1">
+    <description>from a genetree using the ETE Toolkit</description>
+    <macros>
+        <import>ete_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+python '$__tool_directory__/ete_genetree_splitter.py'
+--genetree '$genetreeFile'
+--gene_node $gene_node
+#if $gainlose_conditional.gainlose == "True"
+    --speciestree '$gainlose_conditional.speciesFile'
+    --species_format $gainlose_conditional.species_format
+    --gainlose
+#end if
+--output_format $output_format
+    ]]></command>
+    <inputs>
+        <param name="genetreeFile" type="data" format="nhx" label="GeneTree file" help="GeneTree in nhx format" />
+         <param name="gene_node" type="select" label="Select Gene node format" help="Select Gene node format from one of the option">
+            <option value="0" selected="true">gene_species</option>
+            <option value="1">species_gene</option>
+        </param>
+        <conditional name="gainlose_conditional">
+            <param name="gainlose" type="select" label="Find out gene gain/lose">
+                <option value="True">Yes</option>
+                <option value="False" selected="true">No</option>
+            </param>
+            <when value="False" />
+            <when value="True">
+                <param name="speciesFile" type="data" format="nhx" label="Species file" help="Species Tree in nhx format" />
+                <param name="species_format" type="select" label="Input species tree format" help="See table below">
+                    <option value="0">Flexible with support values (0)</option>
+                    <option value="1">Flexible with internal node names (1)</option>
+                    <option value="2">All branches + leaf names + internal supports (2)</option>
+                    <option value="3">All branches + all names (3)</option>
+                    <option value="4">Leaf branches + leaf names (4)</option>
+                    <option value="5">Internal and leaf branches + leaf names (5)</option>
+                    <option value="6">Internal branches + leaf names (6)</option>
+                    <option value="7">Leaf branches + all names (7)</option>
+                    <option value="8" selected="true">All names (8)</option>
+                    <option value="9">Leaf names (9)</option>
+                    <option value="100">Topology only (100)</option>
+                </param>
+             </when>
+        </conditional> 
+        
+        <param name="output_format" type="select" label="Output GeneTree format" help="See table below">
+            <option value="0">Flexible with support values (0)</option>
+            <option value="1">Flexible with internal node names (1)</option>
+            <option value="2">All branches + leaf names + internal supports (2)</option>
+            <option value="3">All branches + all names (3)</option>
+            <option value="4">Leaf branches + leaf names (4)</option>
+            <option value="5">Internal and leaf branches + leaf names (5)</option>
+            <option value="6">Internal branches + leaf names (6)</option>
+            <option value="7">Leaf branches + all names (7)</option>
+            <option value="8">All names (8)</option>
+            <option value="9" selected="true">Leaf names (9)</option>
+            <option value="100">Topology only (100)</option>
+        </param> 
+    </inputs>
+    <outputs>
+        <collection name="genetrees_lists" type="list" label="${tool.name} on ${on_string}">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_genetree\.nhx" ext="nhx" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="genetreeFile" ftype="nhx" value="genetree.nhx" />
+            <param name="gene_node" value="0"/>
+            <param name="output_format" value="9" />
+            <output_collection name="genetrees_lists" type="list" count="4">
+                <element name="1" file="11_genetree.nhx" ftype="nhx" />
+                <element name="2" file="12_genetree.nhx" ftype="nhx" />
+                <element name="3" file="13_genetree.nhx" ftype="nhx" />
+                <element name="4" file="14_genetree.nhx" ftype="nhx" />
+            </output_collection>
+        </test>
+        <test>
+            <param name="genetreeFile" ftype="nhx" value="genetree.nhx" />
+            <param name="gene_node" value="0"/>
+            <param name="gainlose" value="True" />
+            <param name="speciesFile" ftype="nhx" value="speciestree.nhx" />
+            <param name="species_format" value="8" />
+            <param name="output_format" value="9" />
+            <output_collection name="genetrees_lists" type="list" count="5">
+                <element name="1" file="21_genetree.nhx" ftype="nhx" />
+                <element name="2" file="22_genetree.nhx" ftype="nhx" />
+                <element name="3" file="23_genetree.nhx" ftype="nhx" />
+                <element name="4" file="24_genetree.nhx" ftype="nhx" />
+                <element name="5" file="25_genetree.nhx" ftype="nhx" />
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+Split GeneTrees from single GeneTree by duplication event using the `ETE Toolkit`_.
+
+.. _ETE Toolkit: http://etetoolkit.org/
+
+**Output format:**
+
+======= ============================================= ========================================================================================
+FORMAT  DESCRIPTION                                     SAMPLE
+------- --------------------------------------------- ----------------------------------------------------------------------------------------
+0       flexible with support values                    ((D:0.723274,F:0.567784)1.000000:0.067192,(B:0.279326,H:0.756049)1.000000:0.807788);
+1       flexible with internal node names               ((D:0.723274,F:0.567784)E:0.067192,(B:0.279326,H:0.756049)B:0.807788);
+2       all branches + leaf names + internal supports   ((D:0.723274,F:0.567784)1.000000:0.067192,(B:0.279326,H:0.756049)1.000000:0.807788);
+3       all branches + all names                        ((D:0.723274,F:0.567784)E:0.067192,(B:0.279326,H:0.756049)B:0.807788);
+4       leaf branches + leaf names                      ((D:0.723274,F:0.567784),(B:0.279326,H:0.756049));
+5       internal and leaf branches + leaf names         ((D:0.723274,F:0.567784):0.067192,(B:0.279326,H:0.756049):0.807788);
+6       internal branches + leaf names                  ((D,F):0.067192,(B,H):0.807788);
+7       leaf branches + all names                       ((D:0.723274,F:0.567784)E,(B:0.279326,H:0.756049)B);
+8       all names                                       ((D,F)E,(B,H)B);
+9       leaf names                                      ((D,F),(B,H));
+100     topology only                                   ((,),(,));
+======= ============================================= ========================================================================================
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- a/ete_init_taxdb.xml	Tue Nov 07 11:45:13 2017 -0500
+++ b/ete_init_taxdb.xml	Mon Mar 12 12:51:48 2018 -0400
@@ -34,7 +34,9 @@
         </test>
     </tests>
     <help><![CDATA[
-Generates the ETE sqlite data base from the NCBI taxdump.tar.gz using the .. _ETE Toolkit: http://etetoolkit.org/
+Generates the ETE sqlite data base from the NCBI taxdump.tar.gz using the `ETE Toolkit`_.
+
+.. _ETE Toolkit: http://etetoolkit.org/
 
 **Input**
 
--- a/ete_lineage_generator.xml	Tue Nov 07 11:45:13 2017 -0500
+++ b/ete_lineage_generator.xml	Mon Mar 12 12:51:48 2018 -0400
@@ -115,7 +115,9 @@
         </test>
     </tests>
     <help><![CDATA[
-Generates a table with lineage information for a list of species (also taxids and arbitrary taxons are accepted) using the .. _ETE Toolkit: http://etetoolkit.org/
+Generates a table with lineage information for a list of species (also taxids and arbitrary taxons are accepted) using the `ETE Toolkit`_.
+
+.. _ETE Toolkit: http://etetoolkit.org/
 
 **Input**
 
--- a/ete_mod.xml	Tue Nov 07 11:45:13 2017 -0500
+++ b/ete_mod.xml	Mon Mar 12 12:51:48 2018 -0400
@@ -102,7 +102,9 @@
         </test>
     </tests>
     <help><![CDATA[
-Modify a given input tree using the etetoolkit: http://etetoolkit.org/
+Modify a given input tree using the `ETE Toolkit`_.
+
+.. _ETE Toolkit: http://etetoolkit.org/
     ]]></help>
     <expand macro="citations" />
 </tool>
--- a/ete_species_tree_generator.xml	Tue Nov 07 11:45:13 2017 -0500
+++ b/ete_species_tree_generator.xml	Mon Mar 12 12:51:48 2018 -0400
@@ -8,8 +8,7 @@
         <!-- Anything other than zero is an error -->
         <exit_code range="1:" />
     </stdio>
-    <command>
-    <![CDATA[
+    <command><![CDATA[
 python '$__tool_directory__/ete_species_tree_generator.py'
 -s '$speciesFile'
 -d $database
@@ -20,8 +19,7 @@
     -f ${output_format.format_selector}
 #end if
 -t $output_format.treebest
-    ]]>
-    </command>
+    ]]></command>
     <inputs>
         <param name="speciesFile" type="data" format="txt" label="Species file" help="List with one species per line" />
         <param name="database" type="data" format="sqlite" label="(ETE3) Taxonomy Database" help="The sqlite formatted Taxonomy used by ETE3 (which is derived from NCBI taxonomy)" />
@@ -64,8 +62,7 @@
             </output>
         </test>
     </tests>
-    <help>
-    <![CDATA[
+    <help><![CDATA[
 Generate a species tree from a list of species using the `ETE Toolkit`_.
 
 .. _ETE Toolkit: http://etetoolkit.org/
@@ -87,7 +84,6 @@
 9       leaf names                                      ((D,F),(B,H));
 100     topology only                                   ((,),(,));
 ======= ============================================= ========================================================================================
-    ]]>
-    </help>
+    ]]></help>
     <expand macro="citations" />
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/11_genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+(((insr_rattusnorvegicus,insr_musmusculus),(insr_homosapiens,insr_pantroglodytes)),insr_susscrofa);
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/12_genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+((maob_rattusnorvegicus,maob_musmusculus),((maob_homosapiens,maob_pantroglodytes),(maob_susscrofa,maob_canisfamiliaris)));
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/13_genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+(((maoa_rattusnorvegicus,maoa_musmusculus),(maoa_homosapiens,maoa_pantroglodytes)),(maoa_susscrofa,maoa_canisfamiliaris));
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/14_genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+(((brat1_rattusnorvegicus,brat1_musmusculus),(brat1_homosapiens,brat1_pantroglodytes)),(brat1_susscrofa,brat1_canisfamiliaris));
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/21_genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+((canisfamiliaris,insr_susscrofa),((insr_rattusnorvegicus,insr_musmusculus),(insr_homosapiens,insr_pantroglodytes)));
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/22_genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+(((pantroglodytes,homosapiens),(maob_rattusnorvegicus,maob_musmusculus)),(canisfamiliaris,susscrofa));
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/23_genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+(((rattusnorvegicus,musmusculus),(maob_homosapiens,maob_pantroglodytes)),(maob_susscrofa,maob_canisfamiliaris));
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/24_genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+(((maoa_rattusnorvegicus,maoa_musmusculus),(maoa_homosapiens,maoa_pantroglodytes)),(maoa_susscrofa,maoa_canisfamiliaris));
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/25_genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+(((brat1_rattusnorvegicus,brat1_musmusculus),(brat1_homosapiens,brat1_pantroglodytes)),(brat1_susscrofa,brat1_canisfamiliaris));
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genetree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,45 @@
+((((insr_rattusnorvegicus:0.028171[&&NHX:S=rtn],
+insr_musmusculus:0.02455[&&NHX:S=mms]
+):0.088469[&&NHX:D=N:S=39107:T=31:B=100],
+(insr_homosapiens:0.001221[&&NHX:S=hms],
+insr_pantroglodytes:0.005279[&&NHX:S=ptr]
+):0.043136[&&NHX:D=N:S=207598:T=31:B=100]
+):0.014125[&&NHX:D=N:S=314146:T=5:B=13],
+insr_susscrofa:0.051479[&&NHX:E=$-canisfamiliaris:S=ssf]
+):6.45887[&&NHX:D=N:S=root:T=5:B=13],
+((((maob_rattusnorvegicus:0.032474[&&NHX:S=rtn],
+maob_musmusculus:0.026249[&&NHX:S=mms]
+):0.066927[&&NHX:D=N:E=$-207598-314145:S=39107:T=31:B=100],
+((maob_homosapiens:0.003734[&&NHX:S=hms],
+maob_pantroglodytes:0.003601[&&NHX:S=ptr]
+):0.03495[&&NHX:D=N:E=$-39107:S=207598:T=31:B=100],
+(maob_susscrofa:0.074597[&&NHX:S=ssf],
+maob_canisfamiliaris:0.041979[&&NHX:S=cfs]
+):0.016661[&&NHX:D=N:S=314145:T=17:B=67]
+):0.030385[&&NHX:D=N:S=root:T=17:B=66]
+):0.144506[&&NHX:D=Y:SIS=0:DCS=0.0000:DD=Y:S=root:T=27:B=99],
+(((maoa_rattusnorvegicus:0.02415[&&NHX:S=rtn],
+maoa_musmusculus:0.020631[&&NHX:S=mms]
+):0.113773[&&NHX:D=N:S=39107:T=31:B=100],
+(maoa_homosapiens:0.005275[&&NHX:S=hms],
+maoa_pantroglodytes:0.003923[&&NHX:S=ptr]
+):0.053818[&&NHX:D=N:S=207598:T=31:B=100]
+):0.029482[&&NHX:D=N:S=314146:T=5:B=49],
+(maoa_susscrofa:0.080747[&&NHX:S=ssf],
+maoa_canisfamiliaris:0.074906[&&NHX:S=cfs]
+):0.010572[&&NHX:D=N:S=314145:T=23:B=100]
+):0.227145[&&NHX:D=N:S=root:T=5:B=49]
+):4.96729[&&NHX:D=Y:SIS=100:DCS=1.0000:S=root:T=31:B=100],
+(((brat1_rattusnorvegicus:0.027406[&&NHX:S=rtn],
+brat1_musmusculus:0.035144[&&NHX:S=mms]
+):0.174589[&&NHX:D=N:S=39107:T=31:B=100],
+(brat1_homosapiens:0.003983[&&NHX:S=hms],
+brat1_pantroglodytes:0.009203[&&NHX:S=ptr]
+):0.096543[&&NHX:D=N:S=207598:T=31:B=100]
+):0.034504[&&NHX:D=N:S=314146:T=5:B=21],
+(brat1_susscrofa:0.106917[&&NHX:S=ssf],
+brat1_canisfamiliaris:0.098716[&&NHX:S=cfs]
+):0.029758[&&NHX:D=N:S=314145:T=31:B=100]
+):5.24228[&&NHX:D=N:S=root:T=5:B=21]
+):1.10311[&&NHX:D=Y:SIS=100:DCS=1.0000:S=root:T=29:B=100]
+)[&&NHX:D=Y:SIS=83:DCS=0.8333:S=root:B=0];
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/speciestree.nhx	Mon Mar 12 12:51:48 2018 -0400
@@ -0,0 +1,1 @@
+(((pantroglodytes,homosapiens),(rattusnorvegicus,musmusculus)),(canisfamiliaris,susscrofa));