# HG changeset patch # User earlhaminst # Date 1520873508 14400 # Node ID 077021c45b969e03643ed460c633cb0fafda7795 # Parent 03c10736e4973b71a7d2e9504ffac449eec29e5a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f diff -r 03c10736e497 -r 077021c45b96 ete_genetree_splitter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ete_genetree_splitter.py Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,56 @@ +from __future__ import print_function + +import optparse + +from ete3 import PhyloTree + + +def main(): + usage = "usage: %prog --genetree --speciestree [options]" + parser = optparse.OptionParser(usage=usage) + parser.add_option('--genetree', help='GeneTree in nhx format') + parser.add_option('--speciestree', help='Species Tree in nhx format') + parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)') + parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene') + parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose') + parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)') + options, args = parser.parse_args() + + if options.genetree is None: + parser.error("--genetree option must be specified, GeneTree in nhx format") + + # reads single gene tree + genetree = PhyloTree(options.genetree) + + # sets species naming function + if options.gene_node == 0: + genetree.set_species_naming_function(parse_sp_name) + + # reconcile species tree with gene tree to help find out gene gain/lose + if options.gainlose: + + if options.speciestree is None: + parser.error("--speciestree option must be specified, species tree in nhx format") + + # reads species tree + speciestree = PhyloTree(options.speciestree, format=options.species_format) + + # Removes '*' from Species names comes from Species tree configrured for TreeBest + for leaf in speciestree: + leaf.name = leaf.name.strip('*') + + genetree, events = genetree.reconcile(speciestree) + + # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes. + for cluster_id, node in enumerate(genetree.split_by_dups(), 1): + outfile = str(cluster_id) + '_genetree.nhx' + with open(outfile, 'w') as f: + f.write(node.write(format=options.output_format)) + + +def parse_sp_name(node_name): + return node_name.split("_")[1] + + +if __name__ == "__main__": + main() diff -r 03c10736e497 -r 077021c45b96 ete_genetree_splitter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ete_genetree_splitter.xml Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,123 @@ + + from a genetree using the ETE Toolkit + + ete_macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 03c10736e497 -r 077021c45b96 ete_init_taxdb.xml --- a/ete_init_taxdb.xml Tue Nov 07 11:45:13 2017 -0500 +++ b/ete_init_taxdb.xml Mon Mar 12 12:51:48 2018 -0400 @@ -34,7 +34,9 @@ diff -r 03c10736e497 -r 077021c45b96 ete_species_tree_generator.xml --- a/ete_species_tree_generator.xml Tue Nov 07 11:45:13 2017 -0500 +++ b/ete_species_tree_generator.xml Mon Mar 12 12:51:48 2018 -0400 @@ -8,8 +8,7 @@ - - - + ]]> @@ -64,8 +62,7 @@ - - - + ]]> diff -r 03c10736e497 -r 077021c45b96 test-data/11_genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/11_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((insr_rattusnorvegicus,insr_musmusculus),(insr_homosapiens,insr_pantroglodytes)),insr_susscrofa); \ No newline at end of file diff -r 03c10736e497 -r 077021c45b96 test-data/12_genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/12_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +((maob_rattusnorvegicus,maob_musmusculus),((maob_homosapiens,maob_pantroglodytes),(maob_susscrofa,maob_canisfamiliaris))); \ No newline at end of file diff -r 03c10736e497 -r 077021c45b96 test-data/13_genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/13_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((maoa_rattusnorvegicus,maoa_musmusculus),(maoa_homosapiens,maoa_pantroglodytes)),(maoa_susscrofa,maoa_canisfamiliaris)); \ No newline at end of file diff -r 03c10736e497 -r 077021c45b96 test-data/14_genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/14_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((brat1_rattusnorvegicus,brat1_musmusculus),(brat1_homosapiens,brat1_pantroglodytes)),(brat1_susscrofa,brat1_canisfamiliaris)); \ No newline at end of file diff -r 03c10736e497 -r 077021c45b96 test-data/21_genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/21_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +((canisfamiliaris,insr_susscrofa),((insr_rattusnorvegicus,insr_musmusculus),(insr_homosapiens,insr_pantroglodytes))); \ No newline at end of file diff -r 03c10736e497 -r 077021c45b96 test-data/22_genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/22_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((pantroglodytes,homosapiens),(maob_rattusnorvegicus,maob_musmusculus)),(canisfamiliaris,susscrofa)); \ No newline at end of file diff -r 03c10736e497 -r 077021c45b96 test-data/23_genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/23_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((rattusnorvegicus,musmusculus),(maob_homosapiens,maob_pantroglodytes)),(maob_susscrofa,maob_canisfamiliaris)); \ No newline at end of file diff -r 03c10736e497 -r 077021c45b96 test-data/24_genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/24_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((maoa_rattusnorvegicus,maoa_musmusculus),(maoa_homosapiens,maoa_pantroglodytes)),(maoa_susscrofa,maoa_canisfamiliaris)); \ No newline at end of file diff -r 03c10736e497 -r 077021c45b96 test-data/25_genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/25_genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((brat1_rattusnorvegicus,brat1_musmusculus),(brat1_homosapiens,brat1_pantroglodytes)),(brat1_susscrofa,brat1_canisfamiliaris)); \ No newline at end of file diff -r 03c10736e497 -r 077021c45b96 test-data/genetree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genetree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,45 @@ +((((insr_rattusnorvegicus:0.028171[&&NHX:S=rtn], +insr_musmusculus:0.02455[&&NHX:S=mms] +):0.088469[&&NHX:D=N:S=39107:T=31:B=100], +(insr_homosapiens:0.001221[&&NHX:S=hms], +insr_pantroglodytes:0.005279[&&NHX:S=ptr] +):0.043136[&&NHX:D=N:S=207598:T=31:B=100] +):0.014125[&&NHX:D=N:S=314146:T=5:B=13], +insr_susscrofa:0.051479[&&NHX:E=$-canisfamiliaris:S=ssf] +):6.45887[&&NHX:D=N:S=root:T=5:B=13], +((((maob_rattusnorvegicus:0.032474[&&NHX:S=rtn], +maob_musmusculus:0.026249[&&NHX:S=mms] +):0.066927[&&NHX:D=N:E=$-207598-314145:S=39107:T=31:B=100], +((maob_homosapiens:0.003734[&&NHX:S=hms], +maob_pantroglodytes:0.003601[&&NHX:S=ptr] +):0.03495[&&NHX:D=N:E=$-39107:S=207598:T=31:B=100], +(maob_susscrofa:0.074597[&&NHX:S=ssf], +maob_canisfamiliaris:0.041979[&&NHX:S=cfs] +):0.016661[&&NHX:D=N:S=314145:T=17:B=67] +):0.030385[&&NHX:D=N:S=root:T=17:B=66] +):0.144506[&&NHX:D=Y:SIS=0:DCS=0.0000:DD=Y:S=root:T=27:B=99], +(((maoa_rattusnorvegicus:0.02415[&&NHX:S=rtn], +maoa_musmusculus:0.020631[&&NHX:S=mms] +):0.113773[&&NHX:D=N:S=39107:T=31:B=100], +(maoa_homosapiens:0.005275[&&NHX:S=hms], +maoa_pantroglodytes:0.003923[&&NHX:S=ptr] +):0.053818[&&NHX:D=N:S=207598:T=31:B=100] +):0.029482[&&NHX:D=N:S=314146:T=5:B=49], +(maoa_susscrofa:0.080747[&&NHX:S=ssf], +maoa_canisfamiliaris:0.074906[&&NHX:S=cfs] +):0.010572[&&NHX:D=N:S=314145:T=23:B=100] +):0.227145[&&NHX:D=N:S=root:T=5:B=49] +):4.96729[&&NHX:D=Y:SIS=100:DCS=1.0000:S=root:T=31:B=100], +(((brat1_rattusnorvegicus:0.027406[&&NHX:S=rtn], +brat1_musmusculus:0.035144[&&NHX:S=mms] +):0.174589[&&NHX:D=N:S=39107:T=31:B=100], +(brat1_homosapiens:0.003983[&&NHX:S=hms], +brat1_pantroglodytes:0.009203[&&NHX:S=ptr] +):0.096543[&&NHX:D=N:S=207598:T=31:B=100] +):0.034504[&&NHX:D=N:S=314146:T=5:B=21], +(brat1_susscrofa:0.106917[&&NHX:S=ssf], +brat1_canisfamiliaris:0.098716[&&NHX:S=cfs] +):0.029758[&&NHX:D=N:S=314145:T=31:B=100] +):5.24228[&&NHX:D=N:S=root:T=5:B=21] +):1.10311[&&NHX:D=Y:SIS=100:DCS=1.0000:S=root:T=29:B=100] +)[&&NHX:D=Y:SIS=83:DCS=0.8333:S=root:B=0]; diff -r 03c10736e497 -r 077021c45b96 test-data/speciestree.nhx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/speciestree.nhx Mon Mar 12 12:51:48 2018 -0400 @@ -0,0 +1,1 @@ +(((pantroglodytes,homosapiens),(rattusnorvegicus,musmusculus)),(canisfamiliaris,susscrofa));