annotate data_manager/extract.py @ 0:e22da646fed7 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
author iuc
date Sun, 09 Mar 2025 09:34:24 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
1 import json
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
2 import os
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
3 import sys
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
4
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
5 fun_db = sys.argv[1]
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
6 fun_db_value = sys.argv[2]
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
7 dmjson = sys.argv[3]
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
8
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
9 content = []
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
10 # get options for parameter --busco_db
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
11 # which are just the subfolders of the db dir (minus outgroups/ and trained_species/)
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
12 # https://github.com/nextgenusfs/funannotate/blob/8cc40728fee61566fdf736c1f2292e14cc117660/funannotate/predict.py#L319
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
13 for d in os.scandir(fun_db):
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
14 if not d.is_dir():
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
15 continue
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
16 if d.name in ['outgroups', 'trained_species']:
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
17 continue
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
18 if not os.path.exists(os.path.join(d, "dataset.cfg")):
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
19 continue
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
20 name = d.name.replace("_", " ").capitalize()
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
21 content.append({'value': d.name, 'name': name, 'select': 'busco_db', 'db_value': fun_db_value})
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
22
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
23 # --busco_seed_species
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
24 # trained_species
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
25 for d in os.scandir(os.path.join(fun_db, "trained_species")):
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
26 if not d.is_dir():
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
27 continue
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
28 if not os.path.exists(os.path.join(d, "info.json")):
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
29 continue
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
30 name = d.name.replace("_", " ").capitalize()
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
31 content.append({'value': d.name, 'name': name, 'select': 'trained_species', 'db_value': fun_db_value})
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
32
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
33 # --busco_seed_species
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
34 # outgroups
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
35 for f in os.scandir(os.path.join(fun_db, "outgroups")):
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
36 if f.is_dir():
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
37 continue
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
38 if not f.name.endswith("_buscos.fa"):
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
39 continue
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
40 value = f.name[:-10]
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
41 name = ' - '.join([x.replace("_", " ").capitalize() for x in value.split('.')])
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
42 content.append({'value': value, 'name': name, 'select': 'outgroup', 'db_value': fun_db_value})
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
43
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
44 with open(dmjson, "w") as fh:
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
45 json.dump({"data_tables": {"funannotate_options": content}}, fh)
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
46
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
47 print(f'{len([c for c in content if c["select"]=="busco_db"])} x busco_db\n')
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
48 print(f'{len([c for c in content if c["select"]=="trained_species"])} x trained_species\n')
e22da646fed7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_funannotate commit 0b6712733bce2e4ec6b276a6dec9c7b4bff5a5cd
iuc
parents:
diff changeset
49 print(f'{len([c for c in content if c["select"]=="outgroup"])} x outgroup\n')