Repository 'ete'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/ete

Changeset 5:817031b8486d (2018-05-10)
Previous changeset 4:87b6de3ef63e (2018-03-22) Next changeset 6:f1eca1158f21 (2018-10-10)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0
added:
ete_homology_classifier.py
ete_homology_classifier.xml
test-data/11_homology.csv
test-data/11_homology.tabular
b
diff -r 87b6de3ef63e -r 817031b8486d ete_homology_classifier.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ete_homology_classifier.py Thu May 10 06:15:17 2018 -0400
[
@@ -0,0 +1,78 @@
+from __future__ import print_function
+
+import optparse
+
+from ete3 import PhyloTree
+
+
+def main():
+    usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]"
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option('--genetree', help='GeneTree in nhx format')
+    parser.add_option('--out_format', type='string', default='tabular', help='Choose output format')
+    parser.add_option('--filters', default='', help='Filter families')
+
+    options, args = parser.parse_args()
+
+    if options.genetree is None:
+        parser.error("--genetree option must be specified, GeneTree in nhx format")
+
+    # reads single gene tree
+    genetree = PhyloTree(options.genetree)
+
+    leaves_list = genetree.get_leaf_names()
+    # Genetree nodes are required to be in gene_species format
+    leaves_list = [_ for _ in leaves_list if '_' in _]
+
+    species_list = [_.split("_")[1] for _ in leaves_list]
+
+    species_dict = {}
+    for species in species_list:
+        count = "one"
+        if species in species_dict:
+            count = "many"
+        species_dict[species] = count
+
+    homologies = {
+        'one-to-one': [],
+        'one-to-many': [],
+        'many-to-one': [],
+        'many-to-many': [],
+        'paralogs': []
+    }
+
+    # stores relevant homology types in dict
+    for i, leaf1 in enumerate(leaves_list):
+        for leaf2 in leaves_list[i + 1:]:
+            id1 = leaf1.split(":")[1] if ":" in leaf1 else leaf1
+            id2 = leaf2.split(":")[1] if ":" in leaf2 else leaf2
+            species1 = id1.split("_")[1]
+            species2 = id2.split("_")[1]
+            if species1 == species2:
+                homology_type = 'paralogs'
+            else:
+                homology_type = species_dict[species1] + "-to-" + species_dict[species2]
+            homologies[homology_type].append((id1, id2))
+
+    options.filters = options.filters.split(",")
+
+    if options.out_format == 'tabular':
+        for homology_type, homologs_list in homologies.items():
+            # checks if homology type is in filter
+            if homology_type in options.filters:
+                for (gene1, gene2) in homologs_list:
+                    print("%s\t%s\t%s" % (gene1, gene2, homology_type))
+    elif options.out_format == 'csv':
+        print_family = True
+        for homology_type, homologs_list in homologies.items():
+            if homologs_list and homology_type not in options.filters:
+                print_family = False
+                break
+
+        # prints family if homology type is not found in filter
+        if print_family:
+            print(','.join(leaves_list))
+
+
+if __name__ == "__main__":
+    main()
b
diff -r 87b6de3ef63e -r 817031b8486d ete_homology_classifier.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ete_homology_classifier.xml Thu May 10 06:15:17 2018 -0400
[
@@ -0,0 +1,95 @@
+<tool id="ete_homology_classifier" name="Homology Classifier and Filter" version="@VERSION@">
+    <description>from a genetree utilising the ETE Toolkit</description>
+    <macros>
+        <import>ete_macros.xml</import>
+        <xml name="homologies_macro" token_label="" token_help="">
+            <param name="homologies" type="select" multiple="true" optional="false" display="checkboxes" label="@LABEL@" help="@HELP@">
+                <option value="one-to-one" selected="true">one-to-one</option>
+                <option value="one-to-many" selected="true">one-to-many</option>
+                <option value="many-to-one" selected="true">many-to-one</option>
+                <option value="many-to-many" selected="true">many-to-many</option>
+                <option value="paralogs" selected="true">Paralogs</option>
+            </param>
+        </xml>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code">
+    <![CDATA[
+python '$__tool_directory__/ete_homology_classifier.py'
+--genetree '$genetreeFile'
+--out_format '$format_type.out_format'
+#if $format_type.homologies:
+    --filters $format_type.homologies
+#end if
+> $homology
+    ]]>
+    </command>
+    <inputs>
+        <param name="genetreeFile" type="data" format="nhx" label="GeneTree file" help="GeneTree in nhx format, where nodes are in form of geneid_species" />
+        <conditional name="format_type">
+            <param name="out_format" type="select" label="Output format">
+                <option value="tabular" selected="true">Tabular</option>
+                <option value="csv">One-line CSV</option>
+            </param>
+            <when value="tabular">
+                <expand macro="homologies_macro" label="Pair types to keep"/>
+            </when>
+            <when value="csv">
+                <expand macro="homologies_macro" label="Allowed homology types for the GeneTree" help="If the GeneTree contains any other homology type, then the output will be empty" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="homology" label="${tool.name} on ${on_string}">
+            <change_format>
+                <when input="format_type.out_format" value="csv" format="csv" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="genetreeFile" ftype="nhx" value="genetree.nhx" />
+            <param name="out_format" value="tabular"/>
+            <param name="homologies" value="one-to-one,one-to-many,many-to-one,many-to-many,paralogs" />
+            <output name="homology" file="11_homology.tabular" />
+        </test>
+        <test>
+            <param name="genetreeFile" ftype="nhx" value="11_genetree.nhx" />
+            <param name="out_format" value="csv"/>
+            <output name="homology" file="11_homology.csv" />
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+Classify and filter Homology from single GeneTree by utilising the `ETE Toolkit`_.
+
+.. _ETE Toolkit: http://etetoolkit.org/
+
+**Output format:**
+
+*tabular*:
+
+======================  ========================  ============
+       gene_1                  gene_2               homology
+======================  ========================  ============
+insr_rattusnorvegicus   insr_musmusculus          one-to-one
+insr_rattusnorvegicus   insr_homosapiens          one-to-one
+insr_rattusnorvegicus   insr_pantroglodytes       one-to-one
+insr_rattusnorvegicus   insr_susscrofa            one-to-one
+insr_musmusculus        insr_homosapiens          one-to-one
+insr_musmusculus        insr_pantroglodytes       one-to-one
+insr_musmusculus        insr_susscrofa            one-to-one
+insr_homosapiens        insr_pantroglodytes       one-to-one
+insr_homosapiens        insr_susscrofa            one-to-one
+insr_pantroglodytes     insr_susscrofa            one-to-one
+======================  ========================  ============
+
+\
+
+*One-line CSV*::
+
+    insr_rattusnorvegicus,insr_musmusculus,insr_homosapiens,insr_pantroglodytes,insr_susscrofa
+    ]]>
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 87b6de3ef63e -r 817031b8486d test-data/11_homology.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/11_homology.csv Thu May 10 06:15:17 2018 -0400
b
@@ -0,0 +1,1 @@
+insr_rattusnorvegicus,insr_musmusculus,insr_homosapiens,insr_pantroglodytes,insr_susscrofa
b
diff -r 87b6de3ef63e -r 817031b8486d test-data/11_homology.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/11_homology.tabular Thu May 10 06:15:17 2018 -0400
b
b'@@ -0,0 +1,253 @@\n+insr_rattusnorvegicus\tinsr_musmusculus\tmany-to-many\n+insr_rattusnorvegicus\tinsr_homosapiens\tmany-to-many\n+insr_rattusnorvegicus\tinsr_pantroglodytes\tmany-to-many\n+insr_rattusnorvegicus\tinsr_susscrofa\tmany-to-many\n+insr_rattusnorvegicus\tmaob_musmusculus\tmany-to-many\n+insr_rattusnorvegicus\tmaob_homosapiens\tmany-to-many\n+insr_rattusnorvegicus\tmaob_pantroglodytes\tmany-to-many\n+insr_rattusnorvegicus\tmaob_susscrofa\tmany-to-many\n+insr_rattusnorvegicus\tmaob_canisfamiliaris\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_musmusculus\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_homosapiens\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_pantroglodytes\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_susscrofa\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_canisfamiliaris\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_musmusculus\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_homosapiens\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_pantroglodytes\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_susscrofa\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_canisfamiliaris\tmany-to-many\n+insr_musmusculus\tinsr_homosapiens\tmany-to-many\n+insr_musmusculus\tinsr_pantroglodytes\tmany-to-many\n+insr_musmusculus\tinsr_susscrofa\tmany-to-many\n+insr_musmusculus\tmaob_rattusnorvegicus\tmany-to-many\n+insr_musmusculus\tmaob_homosapiens\tmany-to-many\n+insr_musmusculus\tmaob_pantroglodytes\tmany-to-many\n+insr_musmusculus\tmaob_susscrofa\tmany-to-many\n+insr_musmusculus\tmaob_canisfamiliaris\tmany-to-many\n+insr_musmusculus\tmaoa_rattusnorvegicus\tmany-to-many\n+insr_musmusculus\tmaoa_homosapiens\tmany-to-many\n+insr_musmusculus\tmaoa_pantroglodytes\tmany-to-many\n+insr_musmusculus\tmaoa_susscrofa\tmany-to-many\n+insr_musmusculus\tmaoa_canisfamiliaris\tmany-to-many\n+insr_musmusculus\tbrat1_rattusnorvegicus\tmany-to-many\n+insr_musmusculus\tbrat1_homosapiens\tmany-to-many\n+insr_musmusculus\tbrat1_pantroglodytes\tmany-to-many\n+insr_musmusculus\tbrat1_susscrofa\tmany-to-many\n+insr_musmusculus\tbrat1_canisfamiliaris\tmany-to-many\n+insr_homosapiens\tinsr_pantroglodytes\tmany-to-many\n+insr_homosapiens\tinsr_susscrofa\tmany-to-many\n+insr_homosapiens\tmaob_rattusnorvegicus\tmany-to-many\n+insr_homosapiens\tmaob_musmusculus\tmany-to-many\n+insr_homosapiens\tmaob_pantroglodytes\tmany-to-many\n+insr_homosapiens\tmaob_susscrofa\tmany-to-many\n+insr_homosapiens\tmaob_canisfamiliaris\tmany-to-many\n+insr_homosapiens\tmaoa_rattusnorvegicus\tmany-to-many\n+insr_homosapiens\tmaoa_musmusculus\tmany-to-many\n+insr_homosapiens\tmaoa_pantroglodytes\tmany-to-many\n+insr_homosapiens\tmaoa_susscrofa\tmany-to-many\n+insr_homosapiens\tmaoa_canisfamiliaris\tmany-to-many\n+insr_homosapiens\tbrat1_rattusnorvegicus\tmany-to-many\n+insr_homosapiens\tbrat1_musmusculus\tmany-to-many\n+insr_homosapiens\tbrat1_pantroglodytes\tmany-to-many\n+insr_homosapiens\tbrat1_susscrofa\tmany-to-many\n+insr_homosapiens\tbrat1_canisfamiliaris\tmany-to-many\n+insr_pantroglodytes\tinsr_susscrofa\tmany-to-many\n+insr_pantroglodytes\tmaob_rattusnorvegicus\tmany-to-many\n+insr_pantroglodytes\tmaob_musmusculus\tmany-to-many\n+insr_pantroglodytes\tmaob_homosapiens\tmany-to-many\n+insr_pantroglodytes\tmaob_susscrofa\tmany-to-many\n+insr_pantroglodytes\tmaob_canisfamiliaris\tmany-to-many\n+insr_pantroglodytes\tmaoa_rattusnorvegicus\tmany-to-many\n+insr_pantroglodytes\tmaoa_musmusculus\tmany-to-many\n+insr_pantroglodytes\tmaoa_homosapiens\tmany-to-many\n+insr_pantroglodytes\tmaoa_susscrofa\tmany-to-many\n+insr_pantroglodytes\tmaoa_canisfamiliaris\tmany-to-many\n+insr_pantroglodytes\tbrat1_rattusnorvegicus\tmany-to-many\n+insr_pantroglodytes\tbrat1_musmusculus\tmany-to-many\n+insr_pantroglodytes\tbrat1_homosapiens\tmany-to-many\n+insr_pantroglodytes\tbrat1_susscrofa\tmany-to-many\n+insr_pantroglodytes\tbrat1_canisfamiliaris\tmany-to-many\n+insr_susscrofa\tmaob_rattusnorvegicus\tmany-to-many\n+insr_susscrofa\tmaob_musmusculus\tmany-to-many\n+insr_susscrofa\tmaob_homosapiens\tmany-to-many\n+insr_susscrofa\tmaob_pantroglodytes\tmany-to-many\n+insr_susscrofa\tmaob_canisfamiliaris\tmany-to-many\n+insr_susscrofa\tmaoa_rattusnorvegicus\tmany-to-many\n+insr_susscrofa\tmaoa_musmusculus\tmany-to-many\n+insr_susscrofa\tmaoa_h'..b's\tmaoa_canisfamiliaris\tmany-to-many\n+maoa_musmusculus\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_musmusculus\tbrat1_homosapiens\tmany-to-many\n+maoa_musmusculus\tbrat1_pantroglodytes\tmany-to-many\n+maoa_musmusculus\tbrat1_susscrofa\tmany-to-many\n+maoa_musmusculus\tbrat1_canisfamiliaris\tmany-to-many\n+maoa_homosapiens\tmaoa_pantroglodytes\tmany-to-many\n+maoa_homosapiens\tmaoa_susscrofa\tmany-to-many\n+maoa_homosapiens\tmaoa_canisfamiliaris\tmany-to-many\n+maoa_homosapiens\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_homosapiens\tbrat1_musmusculus\tmany-to-many\n+maoa_homosapiens\tbrat1_pantroglodytes\tmany-to-many\n+maoa_homosapiens\tbrat1_susscrofa\tmany-to-many\n+maoa_homosapiens\tbrat1_canisfamiliaris\tmany-to-many\n+maoa_pantroglodytes\tmaoa_susscrofa\tmany-to-many\n+maoa_pantroglodytes\tmaoa_canisfamiliaris\tmany-to-many\n+maoa_pantroglodytes\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_pantroglodytes\tbrat1_musmusculus\tmany-to-many\n+maoa_pantroglodytes\tbrat1_homosapiens\tmany-to-many\n+maoa_pantroglodytes\tbrat1_susscrofa\tmany-to-many\n+maoa_pantroglodytes\tbrat1_canisfamiliaris\tmany-to-many\n+maoa_susscrofa\tmaoa_canisfamiliaris\tmany-to-many\n+maoa_susscrofa\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_susscrofa\tbrat1_musmusculus\tmany-to-many\n+maoa_susscrofa\tbrat1_homosapiens\tmany-to-many\n+maoa_susscrofa\tbrat1_pantroglodytes\tmany-to-many\n+maoa_susscrofa\tbrat1_canisfamiliaris\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_musmusculus\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_homosapiens\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_pantroglodytes\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_susscrofa\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_musmusculus\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_homosapiens\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_pantroglodytes\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_susscrofa\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_canisfamiliaris\tmany-to-many\n+brat1_musmusculus\tbrat1_homosapiens\tmany-to-many\n+brat1_musmusculus\tbrat1_pantroglodytes\tmany-to-many\n+brat1_musmusculus\tbrat1_susscrofa\tmany-to-many\n+brat1_musmusculus\tbrat1_canisfamiliaris\tmany-to-many\n+brat1_homosapiens\tbrat1_pantroglodytes\tmany-to-many\n+brat1_homosapiens\tbrat1_susscrofa\tmany-to-many\n+brat1_homosapiens\tbrat1_canisfamiliaris\tmany-to-many\n+brat1_pantroglodytes\tbrat1_susscrofa\tmany-to-many\n+brat1_pantroglodytes\tbrat1_canisfamiliaris\tmany-to-many\n+brat1_susscrofa\tbrat1_canisfamiliaris\tmany-to-many\n+insr_rattusnorvegicus\tmaob_rattusnorvegicus\tparalogs\n+insr_rattusnorvegicus\tmaoa_rattusnorvegicus\tparalogs\n+insr_rattusnorvegicus\tbrat1_rattusnorvegicus\tparalogs\n+insr_musmusculus\tmaob_musmusculus\tparalogs\n+insr_musmusculus\tmaoa_musmusculus\tparalogs\n+insr_musmusculus\tbrat1_musmusculus\tparalogs\n+insr_homosapiens\tmaob_homosapiens\tparalogs\n+insr_homosapiens\tmaoa_homosapiens\tparalogs\n+insr_homosapiens\tbrat1_homosapiens\tparalogs\n+insr_pantroglodytes\tmaob_pantroglodytes\tparalogs\n+insr_pantroglodytes\tmaoa_pantroglodytes\tparalogs\n+insr_pantroglodytes\tbrat1_pantroglodytes\tparalogs\n+insr_susscrofa\tmaob_susscrofa\tparalogs\n+insr_susscrofa\tmaoa_susscrofa\tparalogs\n+insr_susscrofa\tbrat1_susscrofa\tparalogs\n+maob_rattusnorvegicus\tmaoa_rattusnorvegicus\tparalogs\n+maob_rattusnorvegicus\tbrat1_rattusnorvegicus\tparalogs\n+maob_musmusculus\tmaoa_musmusculus\tparalogs\n+maob_musmusculus\tbrat1_musmusculus\tparalogs\n+maob_homosapiens\tmaoa_homosapiens\tparalogs\n+maob_homosapiens\tbrat1_homosapiens\tparalogs\n+maob_pantroglodytes\tmaoa_pantroglodytes\tparalogs\n+maob_pantroglodytes\tbrat1_pantroglodytes\tparalogs\n+maob_susscrofa\tmaoa_susscrofa\tparalogs\n+maob_susscrofa\tbrat1_susscrofa\tparalogs\n+maob_canisfamiliaris\tmaoa_canisfamiliaris\tparalogs\n+maob_canisfamiliaris\tbrat1_canisfamiliaris\tparalogs\n+maoa_rattusnorvegicus\tbrat1_rattusnorvegicus\tparalogs\n+maoa_musmusculus\tbrat1_musmusculus\tparalogs\n+maoa_homosapiens\tbrat1_homosapiens\tparalogs\n+maoa_pantroglodytes\tbrat1_pantroglodytes\tparalogs\n+maoa_susscrofa\tbrat1_susscrofa\tparalogs\n+maoa_canisfamiliaris\tbrat1_canisfamiliaris\tparalogs\n'