Mercurial > repos > earlhaminst > ete
annotate ete_genetree_splitter.py @ 16:e4d5d9a69c9d draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/main/tools/ete commit a0d7ab86b86bb764e457767bf8e8bc29868d0cbb
| author | earlhaminst |
|---|---|
| date | Mon, 10 Mar 2025 23:28:17 +0000 |
| parents | dc32007a6b36 |
| children |
| rev | line source |
|---|---|
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
1 from __future__ import print_function |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
2 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
3 import optparse |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
4 import os |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
5 import sys |
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
6 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
7 from ete3 import PhyloTree |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
8 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
9 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
10 def main(): |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
11 usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]" |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
12 parser = optparse.OptionParser(usage=usage) |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
13 parser.add_option('--genetree', help='GeneTree in nhx format') |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
14 parser.add_option('--speciestree', help='Species Tree in nhx format') |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
15 parser.add_option('--ingroup', help='Species Tree in nhx format') |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
16 parser.add_option('--outgroup', help='Species Tree in nhx format') |
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
17 parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)') |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
18 parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene') |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
19 parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose') |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
20 parser.add_option('--split', type='choice', choices=['dups', 'treeko', 'species'], dest="split", default='dups', help='Choose GeneTree splitting algorithms') |
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
21 parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)') |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
22 parser.add_option('-d', '--dir', type='string', default="", help="Absolute or relative path to output directory. If directory does not exist it will be created") |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
23 |
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
24 options, args = parser.parse_args() |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
25 |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
26 if options.dir and not os.path.exists(options.dir): |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
27 os.makedirs(options.dir) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
28 |
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
29 if options.genetree is None: |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
30 parser.error("--genetree option must be specified, GeneTree in nhx format") |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
31 |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
32 if os.stat(options.genetree).st_size == 0: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
33 sys.exit() |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
34 |
|
7
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
35 with open(options.genetree, 'r') as f: |
|
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
36 contents = f.read() |
|
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
37 |
|
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
38 # Remove empty NHX features that can be produced by TreeBest but break ete3 |
|
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
39 contents = contents.replace('[&&NHX]', '') |
|
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
40 |
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
41 # reads single gene tree |
|
7
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
42 genetree = PhyloTree(contents) |
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
43 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
44 # sets species naming function |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
45 if options.gene_node == 0: |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
46 genetree.set_species_naming_function(parse_sp_name) |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
47 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
48 # reconcile species tree with gene tree to help find out gene gain/lose |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
49 if options.gainlose: |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
50 if options.speciestree is None: |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
51 parser.error("--speciestree option must be specified, species tree in nhx format") |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
52 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
53 # reads species tree |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
54 speciestree = PhyloTree(options.speciestree, format=options.species_format) |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
55 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
56 # Removes '*' from Species names comes from Species tree configrured for TreeBest |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
57 for leaf in speciestree: |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
58 leaf.name = leaf.name.strip('*') |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
59 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
60 genetree, events = genetree.reconcile(speciestree) |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
61 |
|
9
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
62 if options.split == "dups": |
|
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
63 # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes. |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
64 for cluster_id, node in enumerate(genetree.split_by_dups(), start=1): |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
65 outfile = '{}_genetree.nhx'.format(cluster_id) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
66 if options.dir: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
67 outfile = os.path.join(options.dir, outfile) |
|
9
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
68 with open(outfile, 'w') as f: |
|
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
69 f.write(node.write(format=options.output_format)) |
|
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
70 elif options.split == "treeko": |
|
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
71 # splits tree using the TreeKO algorithm. |
|
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
72 ntrees, ndups, sptrees = genetree.get_speciation_trees() |
|
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
73 |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
74 for cluster_id, spt in enumerate(sptrees, start=1): |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
75 outfile = '{}_genetree.nhx'.format(cluster_id) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
76 if options.dir: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
77 outfile = os.path.join(options.dir, outfile) |
|
9
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
78 with open(outfile, 'w') as f: |
|
b29ee6a16524
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
7
diff
changeset
|
79 f.write(spt.write(format=options.output_format)) |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
80 elif options.split == "species": |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
81 ingroup = options.ingroup.split(",") |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
82 outgroup = options.outgroup.split(",") |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
83 cluster_id = 0 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
84 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
85 def split_tree_by_species(tree, ingroup, outgroup): |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
86 nonlocal cluster_id |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
87 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
88 if len(outgroup) > 0: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
89 outgroup_bool = check_outgroup(tree, outgroup) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
90 else: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
91 outgroup_bool = True |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
92 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
93 if outgroup_bool and check_ingroup(tree, ingroup): |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
94 child1, child2 = tree.children |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
95 split_tree_by_species(child1, ingroup, outgroup) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
96 split_tree_by_species(child2, ingroup, outgroup) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
97 else: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
98 cluster_id += 1 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
99 outfile = '{}_genetree.nhx'.format(cluster_id) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
100 if options.dir: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
101 outfile = os.path.join(options.dir, outfile) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
102 with open(outfile, 'w') as f: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
103 f.write(tree.write(format=options.output_format)) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
104 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
105 split_tree_by_species(genetree, ingroup, outgroup) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
106 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
107 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
108 def check_outgroup(tree, outgroup): |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
109 species = get_species(tree) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
110 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
111 count = 0 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
112 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
113 for out in outgroup: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
114 if species.count(out) > 1: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
115 count = count + 1 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
116 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
117 return count >= len(outgroup) / 2 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
118 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
119 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
120 def check_ingroup(tree, ingroup): |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
121 species = get_species(tree) |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
122 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
123 count = 0 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
124 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
125 for ing in ingroup: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
126 if species.count(ing) > 1: |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
127 count = count + 1 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
128 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
129 return count > 0 and len(ingroup) / count >= 0.8 |
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
130 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
131 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
132 def parse_sp_name(node_name): |
|
12
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
133 return node_name.split("_")[-1] |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
134 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
135 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
136 def get_species(node): |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
137 leaves_list = node.get_leaf_names() |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
138 # Genetree nodes are required to be in gene_species format |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
139 leaves_list = [_ for _ in leaves_list if '_' in _] |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
140 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
141 species_list = [_.split("_")[-1] for _ in leaves_list] |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
142 |
|
dc32007a6b36
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit c568584f1eaa1366603b89db7e52994812f5d387
earlhaminst
parents:
9
diff
changeset
|
143 return species_list |
|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
144 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
145 |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
146 if __name__ == "__main__": |
|
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
147 main() |
