Previous changeset 4:87b6de3ef63e (2018-03-22) Next changeset 6:f1eca1158f21 (2018-10-10) |
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit b97aee603b9acf29981719160e963a1efe2946d0 |
added:
ete_homology_classifier.py ete_homology_classifier.xml test-data/11_homology.csv test-data/11_homology.tabular |
b |
diff -r 87b6de3ef63e -r 817031b8486d ete_homology_classifier.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ete_homology_classifier.py Thu May 10 06:15:17 2018 -0400 |
[ |
@@ -0,0 +1,78 @@ +from __future__ import print_function + +import optparse + +from ete3 import PhyloTree + + +def main(): + usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]" + parser = optparse.OptionParser(usage=usage) + parser.add_option('--genetree', help='GeneTree in nhx format') + parser.add_option('--out_format', type='string', default='tabular', help='Choose output format') + parser.add_option('--filters', default='', help='Filter families') + + options, args = parser.parse_args() + + if options.genetree is None: + parser.error("--genetree option must be specified, GeneTree in nhx format") + + # reads single gene tree + genetree = PhyloTree(options.genetree) + + leaves_list = genetree.get_leaf_names() + # Genetree nodes are required to be in gene_species format + leaves_list = [_ for _ in leaves_list if '_' in _] + + species_list = [_.split("_")[1] for _ in leaves_list] + + species_dict = {} + for species in species_list: + count = "one" + if species in species_dict: + count = "many" + species_dict[species] = count + + homologies = { + 'one-to-one': [], + 'one-to-many': [], + 'many-to-one': [], + 'many-to-many': [], + 'paralogs': [] + } + + # stores relevant homology types in dict + for i, leaf1 in enumerate(leaves_list): + for leaf2 in leaves_list[i + 1:]: + id1 = leaf1.split(":")[1] if ":" in leaf1 else leaf1 + id2 = leaf2.split(":")[1] if ":" in leaf2 else leaf2 + species1 = id1.split("_")[1] + species2 = id2.split("_")[1] + if species1 == species2: + homology_type = 'paralogs' + else: + homology_type = species_dict[species1] + "-to-" + species_dict[species2] + homologies[homology_type].append((id1, id2)) + + options.filters = options.filters.split(",") + + if options.out_format == 'tabular': + for homology_type, homologs_list in homologies.items(): + # checks if homology type is in filter + if homology_type in options.filters: + for (gene1, gene2) in homologs_list: + print("%s\t%s\t%s" % (gene1, gene2, homology_type)) + elif options.out_format == 'csv': + print_family = True + for homology_type, homologs_list in homologies.items(): + if homologs_list and homology_type not in options.filters: + print_family = False + break + + # prints family if homology type is not found in filter + if print_family: + print(','.join(leaves_list)) + + +if __name__ == "__main__": + main() |
b |
diff -r 87b6de3ef63e -r 817031b8486d ete_homology_classifier.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ete_homology_classifier.xml Thu May 10 06:15:17 2018 -0400 |
[ |
@@ -0,0 +1,95 @@ +<tool id="ete_homology_classifier" name="Homology Classifier and Filter" version="@VERSION@"> + <description>from a genetree utilising the ETE Toolkit</description> + <macros> + <import>ete_macros.xml</import> + <xml name="homologies_macro" token_label="" token_help=""> + <param name="homologies" type="select" multiple="true" optional="false" display="checkboxes" label="@LABEL@" help="@HELP@"> + <option value="one-to-one" selected="true">one-to-one</option> + <option value="one-to-many" selected="true">one-to-many</option> + <option value="many-to-one" selected="true">many-to-one</option> + <option value="many-to-many" selected="true">many-to-many</option> + <option value="paralogs" selected="true">Paralogs</option> + </param> + </xml> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"> + <![CDATA[ +python '$__tool_directory__/ete_homology_classifier.py' +--genetree '$genetreeFile' +--out_format '$format_type.out_format' +#if $format_type.homologies: + --filters $format_type.homologies +#end if +> $homology + ]]> + </command> + <inputs> + <param name="genetreeFile" type="data" format="nhx" label="GeneTree file" help="GeneTree in nhx format, where nodes are in form of geneid_species" /> + <conditional name="format_type"> + <param name="out_format" type="select" label="Output format"> + <option value="tabular" selected="true">Tabular</option> + <option value="csv">One-line CSV</option> + </param> + <when value="tabular"> + <expand macro="homologies_macro" label="Pair types to keep"/> + </when> + <when value="csv"> + <expand macro="homologies_macro" label="Allowed homology types for the GeneTree" help="If the GeneTree contains any other homology type, then the output will be empty" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="homology" label="${tool.name} on ${on_string}"> + <change_format> + <when input="format_type.out_format" value="csv" format="csv" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="genetreeFile" ftype="nhx" value="genetree.nhx" /> + <param name="out_format" value="tabular"/> + <param name="homologies" value="one-to-one,one-to-many,many-to-one,many-to-many,paralogs" /> + <output name="homology" file="11_homology.tabular" /> + </test> + <test> + <param name="genetreeFile" ftype="nhx" value="11_genetree.nhx" /> + <param name="out_format" value="csv"/> + <output name="homology" file="11_homology.csv" /> + </test> + </tests> + <help> + <![CDATA[ +Classify and filter Homology from single GeneTree by utilising the `ETE Toolkit`_. + +.. _ETE Toolkit: http://etetoolkit.org/ + +**Output format:** + +*tabular*: + +====================== ======================== ============ + gene_1 gene_2 homology +====================== ======================== ============ +insr_rattusnorvegicus insr_musmusculus one-to-one +insr_rattusnorvegicus insr_homosapiens one-to-one +insr_rattusnorvegicus insr_pantroglodytes one-to-one +insr_rattusnorvegicus insr_susscrofa one-to-one +insr_musmusculus insr_homosapiens one-to-one +insr_musmusculus insr_pantroglodytes one-to-one +insr_musmusculus insr_susscrofa one-to-one +insr_homosapiens insr_pantroglodytes one-to-one +insr_homosapiens insr_susscrofa one-to-one +insr_pantroglodytes insr_susscrofa one-to-one +====================== ======================== ============ + +\ + +*One-line CSV*:: + + insr_rattusnorvegicus,insr_musmusculus,insr_homosapiens,insr_pantroglodytes,insr_susscrofa + ]]> + </help> + <expand macro="citations" /> +</tool> |
b |
diff -r 87b6de3ef63e -r 817031b8486d test-data/11_homology.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/11_homology.csv Thu May 10 06:15:17 2018 -0400 |
b |
@@ -0,0 +1,1 @@ +insr_rattusnorvegicus,insr_musmusculus,insr_homosapiens,insr_pantroglodytes,insr_susscrofa |
b |
diff -r 87b6de3ef63e -r 817031b8486d test-data/11_homology.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/11_homology.tabular Thu May 10 06:15:17 2018 -0400 |
b |
b'@@ -0,0 +1,253 @@\n+insr_rattusnorvegicus\tinsr_musmusculus\tmany-to-many\n+insr_rattusnorvegicus\tinsr_homosapiens\tmany-to-many\n+insr_rattusnorvegicus\tinsr_pantroglodytes\tmany-to-many\n+insr_rattusnorvegicus\tinsr_susscrofa\tmany-to-many\n+insr_rattusnorvegicus\tmaob_musmusculus\tmany-to-many\n+insr_rattusnorvegicus\tmaob_homosapiens\tmany-to-many\n+insr_rattusnorvegicus\tmaob_pantroglodytes\tmany-to-many\n+insr_rattusnorvegicus\tmaob_susscrofa\tmany-to-many\n+insr_rattusnorvegicus\tmaob_canisfamiliaris\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_musmusculus\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_homosapiens\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_pantroglodytes\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_susscrofa\tmany-to-many\n+insr_rattusnorvegicus\tmaoa_canisfamiliaris\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_musmusculus\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_homosapiens\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_pantroglodytes\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_susscrofa\tmany-to-many\n+insr_rattusnorvegicus\tbrat1_canisfamiliaris\tmany-to-many\n+insr_musmusculus\tinsr_homosapiens\tmany-to-many\n+insr_musmusculus\tinsr_pantroglodytes\tmany-to-many\n+insr_musmusculus\tinsr_susscrofa\tmany-to-many\n+insr_musmusculus\tmaob_rattusnorvegicus\tmany-to-many\n+insr_musmusculus\tmaob_homosapiens\tmany-to-many\n+insr_musmusculus\tmaob_pantroglodytes\tmany-to-many\n+insr_musmusculus\tmaob_susscrofa\tmany-to-many\n+insr_musmusculus\tmaob_canisfamiliaris\tmany-to-many\n+insr_musmusculus\tmaoa_rattusnorvegicus\tmany-to-many\n+insr_musmusculus\tmaoa_homosapiens\tmany-to-many\n+insr_musmusculus\tmaoa_pantroglodytes\tmany-to-many\n+insr_musmusculus\tmaoa_susscrofa\tmany-to-many\n+insr_musmusculus\tmaoa_canisfamiliaris\tmany-to-many\n+insr_musmusculus\tbrat1_rattusnorvegicus\tmany-to-many\n+insr_musmusculus\tbrat1_homosapiens\tmany-to-many\n+insr_musmusculus\tbrat1_pantroglodytes\tmany-to-many\n+insr_musmusculus\tbrat1_susscrofa\tmany-to-many\n+insr_musmusculus\tbrat1_canisfamiliaris\tmany-to-many\n+insr_homosapiens\tinsr_pantroglodytes\tmany-to-many\n+insr_homosapiens\tinsr_susscrofa\tmany-to-many\n+insr_homosapiens\tmaob_rattusnorvegicus\tmany-to-many\n+insr_homosapiens\tmaob_musmusculus\tmany-to-many\n+insr_homosapiens\tmaob_pantroglodytes\tmany-to-many\n+insr_homosapiens\tmaob_susscrofa\tmany-to-many\n+insr_homosapiens\tmaob_canisfamiliaris\tmany-to-many\n+insr_homosapiens\tmaoa_rattusnorvegicus\tmany-to-many\n+insr_homosapiens\tmaoa_musmusculus\tmany-to-many\n+insr_homosapiens\tmaoa_pantroglodytes\tmany-to-many\n+insr_homosapiens\tmaoa_susscrofa\tmany-to-many\n+insr_homosapiens\tmaoa_canisfamiliaris\tmany-to-many\n+insr_homosapiens\tbrat1_rattusnorvegicus\tmany-to-many\n+insr_homosapiens\tbrat1_musmusculus\tmany-to-many\n+insr_homosapiens\tbrat1_pantroglodytes\tmany-to-many\n+insr_homosapiens\tbrat1_susscrofa\tmany-to-many\n+insr_homosapiens\tbrat1_canisfamiliaris\tmany-to-many\n+insr_pantroglodytes\tinsr_susscrofa\tmany-to-many\n+insr_pantroglodytes\tmaob_rattusnorvegicus\tmany-to-many\n+insr_pantroglodytes\tmaob_musmusculus\tmany-to-many\n+insr_pantroglodytes\tmaob_homosapiens\tmany-to-many\n+insr_pantroglodytes\tmaob_susscrofa\tmany-to-many\n+insr_pantroglodytes\tmaob_canisfamiliaris\tmany-to-many\n+insr_pantroglodytes\tmaoa_rattusnorvegicus\tmany-to-many\n+insr_pantroglodytes\tmaoa_musmusculus\tmany-to-many\n+insr_pantroglodytes\tmaoa_homosapiens\tmany-to-many\n+insr_pantroglodytes\tmaoa_susscrofa\tmany-to-many\n+insr_pantroglodytes\tmaoa_canisfamiliaris\tmany-to-many\n+insr_pantroglodytes\tbrat1_rattusnorvegicus\tmany-to-many\n+insr_pantroglodytes\tbrat1_musmusculus\tmany-to-many\n+insr_pantroglodytes\tbrat1_homosapiens\tmany-to-many\n+insr_pantroglodytes\tbrat1_susscrofa\tmany-to-many\n+insr_pantroglodytes\tbrat1_canisfamiliaris\tmany-to-many\n+insr_susscrofa\tmaob_rattusnorvegicus\tmany-to-many\n+insr_susscrofa\tmaob_musmusculus\tmany-to-many\n+insr_susscrofa\tmaob_homosapiens\tmany-to-many\n+insr_susscrofa\tmaob_pantroglodytes\tmany-to-many\n+insr_susscrofa\tmaob_canisfamiliaris\tmany-to-many\n+insr_susscrofa\tmaoa_rattusnorvegicus\tmany-to-many\n+insr_susscrofa\tmaoa_musmusculus\tmany-to-many\n+insr_susscrofa\tmaoa_h'..b's\tmaoa_canisfamiliaris\tmany-to-many\n+maoa_musmusculus\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_musmusculus\tbrat1_homosapiens\tmany-to-many\n+maoa_musmusculus\tbrat1_pantroglodytes\tmany-to-many\n+maoa_musmusculus\tbrat1_susscrofa\tmany-to-many\n+maoa_musmusculus\tbrat1_canisfamiliaris\tmany-to-many\n+maoa_homosapiens\tmaoa_pantroglodytes\tmany-to-many\n+maoa_homosapiens\tmaoa_susscrofa\tmany-to-many\n+maoa_homosapiens\tmaoa_canisfamiliaris\tmany-to-many\n+maoa_homosapiens\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_homosapiens\tbrat1_musmusculus\tmany-to-many\n+maoa_homosapiens\tbrat1_pantroglodytes\tmany-to-many\n+maoa_homosapiens\tbrat1_susscrofa\tmany-to-many\n+maoa_homosapiens\tbrat1_canisfamiliaris\tmany-to-many\n+maoa_pantroglodytes\tmaoa_susscrofa\tmany-to-many\n+maoa_pantroglodytes\tmaoa_canisfamiliaris\tmany-to-many\n+maoa_pantroglodytes\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_pantroglodytes\tbrat1_musmusculus\tmany-to-many\n+maoa_pantroglodytes\tbrat1_homosapiens\tmany-to-many\n+maoa_pantroglodytes\tbrat1_susscrofa\tmany-to-many\n+maoa_pantroglodytes\tbrat1_canisfamiliaris\tmany-to-many\n+maoa_susscrofa\tmaoa_canisfamiliaris\tmany-to-many\n+maoa_susscrofa\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_susscrofa\tbrat1_musmusculus\tmany-to-many\n+maoa_susscrofa\tbrat1_homosapiens\tmany-to-many\n+maoa_susscrofa\tbrat1_pantroglodytes\tmany-to-many\n+maoa_susscrofa\tbrat1_canisfamiliaris\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_rattusnorvegicus\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_musmusculus\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_homosapiens\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_pantroglodytes\tmany-to-many\n+maoa_canisfamiliaris\tbrat1_susscrofa\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_musmusculus\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_homosapiens\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_pantroglodytes\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_susscrofa\tmany-to-many\n+brat1_rattusnorvegicus\tbrat1_canisfamiliaris\tmany-to-many\n+brat1_musmusculus\tbrat1_homosapiens\tmany-to-many\n+brat1_musmusculus\tbrat1_pantroglodytes\tmany-to-many\n+brat1_musmusculus\tbrat1_susscrofa\tmany-to-many\n+brat1_musmusculus\tbrat1_canisfamiliaris\tmany-to-many\n+brat1_homosapiens\tbrat1_pantroglodytes\tmany-to-many\n+brat1_homosapiens\tbrat1_susscrofa\tmany-to-many\n+brat1_homosapiens\tbrat1_canisfamiliaris\tmany-to-many\n+brat1_pantroglodytes\tbrat1_susscrofa\tmany-to-many\n+brat1_pantroglodytes\tbrat1_canisfamiliaris\tmany-to-many\n+brat1_susscrofa\tbrat1_canisfamiliaris\tmany-to-many\n+insr_rattusnorvegicus\tmaob_rattusnorvegicus\tparalogs\n+insr_rattusnorvegicus\tmaoa_rattusnorvegicus\tparalogs\n+insr_rattusnorvegicus\tbrat1_rattusnorvegicus\tparalogs\n+insr_musmusculus\tmaob_musmusculus\tparalogs\n+insr_musmusculus\tmaoa_musmusculus\tparalogs\n+insr_musmusculus\tbrat1_musmusculus\tparalogs\n+insr_homosapiens\tmaob_homosapiens\tparalogs\n+insr_homosapiens\tmaoa_homosapiens\tparalogs\n+insr_homosapiens\tbrat1_homosapiens\tparalogs\n+insr_pantroglodytes\tmaob_pantroglodytes\tparalogs\n+insr_pantroglodytes\tmaoa_pantroglodytes\tparalogs\n+insr_pantroglodytes\tbrat1_pantroglodytes\tparalogs\n+insr_susscrofa\tmaob_susscrofa\tparalogs\n+insr_susscrofa\tmaoa_susscrofa\tparalogs\n+insr_susscrofa\tbrat1_susscrofa\tparalogs\n+maob_rattusnorvegicus\tmaoa_rattusnorvegicus\tparalogs\n+maob_rattusnorvegicus\tbrat1_rattusnorvegicus\tparalogs\n+maob_musmusculus\tmaoa_musmusculus\tparalogs\n+maob_musmusculus\tbrat1_musmusculus\tparalogs\n+maob_homosapiens\tmaoa_homosapiens\tparalogs\n+maob_homosapiens\tbrat1_homosapiens\tparalogs\n+maob_pantroglodytes\tmaoa_pantroglodytes\tparalogs\n+maob_pantroglodytes\tbrat1_pantroglodytes\tparalogs\n+maob_susscrofa\tmaoa_susscrofa\tparalogs\n+maob_susscrofa\tbrat1_susscrofa\tparalogs\n+maob_canisfamiliaris\tmaoa_canisfamiliaris\tparalogs\n+maob_canisfamiliaris\tbrat1_canisfamiliaris\tparalogs\n+maoa_rattusnorvegicus\tbrat1_rattusnorvegicus\tparalogs\n+maoa_musmusculus\tbrat1_musmusculus\tparalogs\n+maoa_homosapiens\tbrat1_homosapiens\tparalogs\n+maoa_pantroglodytes\tbrat1_pantroglodytes\tparalogs\n+maoa_susscrofa\tbrat1_susscrofa\tparalogs\n+maoa_canisfamiliaris\tbrat1_canisfamiliaris\tparalogs\n' |