Mercurial > repos > iuc > data_manager_funannotate
comparison data_manager/funannotate.py @ 1:8dff71edbce5 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_funannotate commit d1ebc78155f57c87d8e82c9855b176428e9803ad"
author | iuc |
---|---|
date | Thu, 18 Nov 2021 21:55:43 +0000 |
parents | ef7f9e2f32f2 |
children | 13018941c6a0 |
comparison
equal
deleted
inserted
replaced
0:ef7f9e2f32f2 | 1:8dff71edbce5 |
---|---|
4 import json | 4 import json |
5 import operator | 5 import operator |
6 import os | 6 import os |
7 import subprocess | 7 import subprocess |
8 import sys | 8 import sys |
9 import tarfile | |
9 from datetime import datetime | 10 from datetime import datetime |
11 | |
12 import requests | |
13 | |
14 # Some additional busco/orthodb10 datasets that can be added to funannotate db | |
15 # Will probably not be needed anymore in future versions of funannotate when it | |
16 # will use a recent busco version | |
17 BUSCO_10_DATASETS_URL = "https://busco-data.ezlab.org/v5/data/lineages/{dataset}" | |
18 BUSCO_10_DATASETS = [ | |
19 "acidobacteria_odb10.2020-03-06.tar.gz", | |
20 "aconoidasida_odb10.2020-08-05.tar.gz", | |
21 "actinobacteria_class_odb10.2021-02-23.tar.gz", | |
22 "actinobacteria_phylum_odb10.2021-02-23.tar.gz", | |
23 "actinopterygii_odb10.2021-02-19.tar.gz", | |
24 "agaricales_odb10.2020-08-05.tar.gz", | |
25 "agaricomycetes_odb10.2020-08-05.tar.gz", | |
26 "alphabaculovirus_odb10.2020-11-26.tar.gz", | |
27 "alphaherpesvirinae_odb10.2020-11-26.tar.gz", | |
28 "alphaproteobacteria_odb10.2021-02-23.tar.gz", | |
29 "alteromonadales_odb10.2021-02-23.tar.gz", | |
30 "alveolata_odb10.2020-09-10.tar.gz", | |
31 "apicomplexa_odb10.2020-09-10.tar.gz", | |
32 "aquificae_odb10.2021-02-23.tar.gz", | |
33 "arachnida_odb10.2020-08-05.tar.gz", | |
34 "archaea_odb10.2021-02-23.tar.gz", | |
35 "arthropoda_odb10.2020-09-10.tar.gz", | |
36 "ascomycota_odb10.2020-09-10.tar.gz", | |
37 "aves_odb10.2021-02-19.tar.gz", | |
38 "aviadenovirus_odb10.2020-11-26.tar.gz", | |
39 "bacillales_odb10.2021-02-23.tar.gz", | |
40 "bacilli_odb10.2021-02-23.tar.gz", | |
41 "bacteria_odb10.2020-03-06.tar.gz", | |
42 "bacteroidales_odb10.2021-02-23.tar.gz", | |
43 "bacteroidetes-chlorobi_group_odb10.2021-02-23.tar.gz", | |
44 "bacteroidetes_odb10.2021-02-23.tar.gz", | |
45 "bacteroidia_odb10.2021-02-23.tar.gz", | |
46 "baculoviridae_odb10.2020-11-26.tar.gz", | |
47 "basidiomycota_odb10.2020-09-10.tar.gz", | |
48 "bclasvirinae_odb10.2020-11-26.tar.gz", | |
49 "betabaculovirus_odb10.2020-11-26.tar.gz", | |
50 "betaherpesvirinae_odb10.2020-11-26.tar.gz", | |
51 "betaproteobacteria_odb10.2021-02-23.tar.gz", | |
52 "boletales_odb10.2020-08-05.tar.gz", | |
53 "brassicales_odb10.2020-08-05.tar.gz", | |
54 "burkholderiales_odb10.2021-02-23.tar.gz", | |
55 "campylobacterales_odb10.2020-03-06.tar.gz", | |
56 "capnodiales_odb10.2020-08-05.tar.gz", | |
57 "carnivora_odb10.2021-02-19.tar.gz", | |
58 "cellvibrionales_odb10.2020-03-06.tar.gz", | |
59 "cetartiodactyla_odb10.2021-02-19.tar.gz", | |
60 "chaetothyriales_odb10.2020-08-05.tar.gz", | |
61 "cheoctovirus_odb10.2020-11-26.tar.gz", | |
62 "chlamydiae_odb10.2020-03-06.tar.gz", | |
63 "chlorobi_odb10.2020-03-06.tar.gz", | |
64 "chloroflexi_odb10.2020-03-06.tar.gz", | |
65 "chlorophyta_odb10.2020-08-05.tar.gz", | |
66 "chordopoxvirinae_odb10.2020-11-26.tar.gz", | |
67 "chromatiales_odb10.2020-03-06.tar.gz", | |
68 "chroococcales_odb10.2020-03-06.tar.gz", | |
69 "clostridia_odb10.2020-03-06.tar.gz", | |
70 "clostridiales_odb10.2020-03-06.tar.gz", | |
71 "coccidia_odb10.2020-08-05.tar.gz", | |
72 "coriobacteriales_odb10.2020-03-06.tar.gz", | |
73 "coriobacteriia_odb10.2020-03-06.tar.gz", | |
74 "corynebacteriales_odb10.2020-03-06.tar.gz", | |
75 "cyanobacteria_odb10.2021-02-23.tar.gz", | |
76 "cyprinodontiformes_odb10.2021-02-19.tar.gz", | |
77 "cytophagales_odb10.2021-02-23.tar.gz", | |
78 "cytophagia_odb10.2021-02-23.tar.gz", | |
79 "delta-epsilon-subdivisions_odb10.2021-02-23.tar.gz", | |
80 "deltaproteobacteria_odb10.2021-02-23.tar.gz", | |
81 "desulfobacterales_odb10.2020-03-06.tar.gz", | |
82 "desulfovibrionales_odb10.2021-02-23.tar.gz", | |
83 "desulfurococcales_odb10.2021-02-23.tar.gz", | |
84 "desulfuromonadales_odb10.2020-03-06.tar.gz", | |
85 "diptera_odb10.2020-08-05.tar.gz", | |
86 "dothideomycetes_odb10.2020-08-05.tar.gz", | |
87 "embryophyta_odb10.2020-09-10.tar.gz", | |
88 "endopterygota_odb10.2020-09-10.tar.gz", | |
89 "enquatrovirus_odb10.2021-05-05.tar.gz", | |
90 "enterobacterales_odb10.2021-02-23.tar.gz", | |
91 "entomoplasmatales_odb10.2020-03-06.tar.gz", | |
92 "epsilonproteobacteria_odb10.2020-03-06.tar.gz", | |
93 "euarchontoglires_odb10.2021-02-19.tar.gz", | |
94 "eudicots_odb10.2020-09-10.tar.gz", | |
95 "euglenozoa_odb10.2020-08-05.tar.gz", | |
96 "eukaryota_odb10.2020-09-10.tar.gz", | |
97 "eurotiales_odb10.2020-08-05.tar.gz", | |
98 "eurotiomycetes_odb10.2020-08-05.tar.gz", | |
99 "euryarchaeota_odb10.2021-02-23.tar.gz", | |
100 "eutheria_odb10.2021-02-19.tar.gz", | |
101 "fabales_odb10.2020-08-05.tar.gz", | |
102 "firmicutes_odb10.2021-02-23.tar.gz", | |
103 "flavobacteriales_odb10.2021-02-23.tar.gz", | |
104 "flavobacteriia_odb10.2021-02-23.tar.gz", | |
105 "fromanvirus_odb10.2020-11-26.tar.gz", | |
106 "fungi_odb10.2021-06-28.tar.gz", | |
107 "fusobacteria_odb10.2020-03-06.tar.gz", | |
108 "fusobacteriales_odb10.2020-03-06.tar.gz", | |
109 "gammaherpesvirinae_odb10.2020-11-26.tar.gz", | |
110 "gammaproteobacteria_odb10.2021-02-23.tar.gz", | |
111 "glires_odb10.2021-02-19.tar.gz", | |
112 "glomerellales_odb10.2020-08-05.tar.gz", | |
113 "guernseyvirinae_odb10.2020-11-26.tar.gz", | |
114 "halobacteria_odb10.2021-02-23.tar.gz", | |
115 "halobacteriales_odb10.2021-02-23.tar.gz", | |
116 "haloferacales_odb10.2021-02-23.tar.gz", | |
117 "helotiales_odb10.2020-08-05.tar.gz", | |
118 "hemiptera_odb10.2020-08-05.tar.gz", | |
119 "herpesviridae_odb10.2020-11-26.tar.gz", | |
120 "hymenoptera_odb10.2020-08-05.tar.gz", | |
121 "hypocreales_odb10.2020-08-05.tar.gz", | |
122 "insecta_odb10.2020-09-10.tar.gz", | |
123 "iridoviridae_odb10.2020-11-26.tar.gz", | |
124 "lactobacillales_odb10.2020-03-06.tar.gz", | |
125 "laurasiatheria_odb10.2021-02-19.tar.gz", | |
126 "legionellales_odb10.2020-03-06.tar.gz", | |
127 "leotiomycetes_odb10.2020-08-05.tar.gz", | |
128 "lepidoptera_odb10.2020-08-05.tar.gz", | |
129 "liliopsida_odb10.2020-09-10.tar.gz", | |
130 "mammalia_odb10.2021-02-19.tar.gz", | |
131 "metazoa_odb10.2021-02-24.tar.gz", | |
132 "methanobacteria_odb10.2021-02-23.tar.gz", | |
133 "methanococcales_odb10.2021-02-23.tar.gz", | |
134 "methanomicrobia_odb10.2021-02-23.tar.gz", | |
135 "methanomicrobiales_odb10.2021-02-23.tar.gz", | |
136 "micrococcales_odb10.2021-02-23.tar.gz", | |
137 "microsporidia_odb10.2020-08-05.tar.gz", | |
138 "mollicutes_odb10.2020-03-06.tar.gz", | |
139 "mollusca_odb10.2020-08-05.tar.gz", | |
140 "mucorales_odb10.2020-08-05.tar.gz", | |
141 "mucoromycota_odb10.2020-08-05.tar.gz", | |
142 "mycoplasmatales_odb10.2020-03-06.tar.gz", | |
143 "natrialbales_odb10.2021-02-23.tar.gz", | |
144 "neisseriales_odb10.2021-02-23.tar.gz", | |
145 "nematoda_odb10.2020-08-05.tar.gz", | |
146 "nitrosomonadales_odb10.2020-03-06.tar.gz", | |
147 "nostocales_odb10.2020-03-06.tar.gz", | |
148 "oceanospirillales_odb10.2020-03-06.tar.gz", | |
149 "onygenales_odb10.2020-08-05.tar.gz", | |
150 "oscillatoriales_odb10.2021-02-23.tar.gz", | |
151 "pahexavirus_odb10.2020-11-26.tar.gz", | |
152 "passeriformes_odb10.2021-02-19.tar.gz", | |
153 "pasteurellales_odb10.2021-02-23.tar.gz", | |
154 "peduovirus_odb10.2021-02-23.tar.gz", | |
155 "planctomycetes_odb10.2020-03-06.tar.gz", | |
156 "plasmodium_odb10.2020-08-05.tar.gz", | |
157 "pleosporales_odb10.2020-08-05.tar.gz", | |
158 "poales_odb10.2020-08-05.tar.gz", | |
159 "polyporales_odb10.2020-08-05.tar.gz", | |
160 "poxviridae_odb10.2020-11-26.tar.gz", | |
161 "primates_odb10.2021-02-19.tar.gz", | |
162 "propionibacteriales_odb10.2020-03-06.tar.gz", | |
163 "proteobacteria_odb10.2021-02-23.tar.gz", | |
164 "pseudomonadales_odb10.2020-03-06.tar.gz", | |
165 "rhizobiales_odb10.2020-03-06.tar.gz", | |
166 "rhizobium-agrobacterium_group_odb10.2020-03-06.tar.gz", | |
167 "rhodobacterales_odb10.2021-02-23.tar.gz", | |
168 "rhodospirillales_odb10.2020-03-06.tar.gz", | |
169 "rickettsiales_odb10.2020-03-06.tar.gz", | |
170 "rudiviridae_odb10.2020-11-26.tar.gz", | |
171 "saccharomycetes_odb10.2020-08-05.tar.gz", | |
172 "sauropsida_odb10.2021-02-19.tar.gz", | |
173 "selenomonadales_odb10.2020-03-06.tar.gz", | |
174 "simplexvirus_odb10.2020-11-26.tar.gz", | |
175 "skunavirus_odb10.2020-11-26.tar.gz", | |
176 "solanales_odb10.2020-08-05.tar.gz", | |
177 "sordariomycetes_odb10.2020-08-05.tar.gz", | |
178 "sphingobacteriia_odb10.2020-03-06.tar.gz", | |
179 "sphingomonadales_odb10.2021-02-23.tar.gz", | |
180 "spirochaetales_odb10.2020-03-06.tar.gz", | |
181 "spirochaetes_odb10.2021-02-23.tar.gz", | |
182 "spirochaetia_odb10.2021-02-23.tar.gz", | |
183 "spounavirinae_odb10.2020-11-26.tar.gz", | |
184 "stramenopiles_odb10.2020-08-05.tar.gz", | |
185 "streptomycetales_odb10.2020-03-06.tar.gz", | |
186 "streptosporangiales_odb10.2020-03-06.tar.gz", | |
187 "sulfolobales_odb10.2021-02-23.tar.gz", | |
188 "synechococcales_odb10.2020-03-06.tar.gz", | |
189 "synergistetes_odb10.2020-03-06.tar.gz", | |
190 "tenericutes_odb10.2020-03-06.tar.gz", | |
191 "tequatrovirus_odb10.2020-11-26.tar.gz", | |
192 "teseptimavirus_odb10.2020-11-26.tar.gz", | |
193 "tetrapoda_odb10.2021-02-19.tar.gz", | |
194 "tevenvirinae_odb10.2021-02-23.tar.gz", | |
195 "thaumarchaeota_odb10.2021-02-23.tar.gz", | |
196 "thermoanaerobacterales_odb10.2020-03-06.tar.gz", | |
197 "thermoplasmata_odb10.2021-02-23.tar.gz", | |
198 "thermoproteales_odb10.2021-02-23.tar.gz", | |
199 "thermoprotei_odb10.2021-02-23.tar.gz", | |
200 "thermotogae_odb10.2020-03-06.tar.gz", | |
201 "thiotrichales_odb10.2020-03-06.tar.gz", | |
202 "tissierellales_odb10.2020-03-06.tar.gz", | |
203 "tissierellia_odb10.2020-03-06.tar.gz", | |
204 "tremellomycetes_odb10.2020-08-05.tar.gz", | |
205 "tunavirinae_odb10.2020-11-26.tar.gz", | |
206 "varicellovirus_odb10.2020-11-26.tar.gz", | |
207 "verrucomicrobia_odb10.2020-03-06.tar.gz", | |
208 "vertebrata_odb10.2021-02-19.tar.gz", | |
209 "vibrionales_odb10.2020-03-06.tar.gz", | |
210 "viridiplantae_odb10.2020-09-10.tar.gz", | |
211 "xanthomonadales_odb10.2020-03-06.tar.gz", | |
212 ] | |
213 | |
214 | |
215 def download_file(url, dest): | |
216 with requests.get(url, stream=True) as r: | |
217 r.raise_for_status() | |
218 with open(dest, 'wb') as f: | |
219 for chunk in r.iter_content(chunk_size=8192): | |
220 f.write(chunk) | |
10 | 221 |
11 | 222 |
12 if __name__ == "__main__": | 223 if __name__ == "__main__": |
13 | 224 |
14 parser = argparse.ArgumentParser() | 225 parser = argparse.ArgumentParser() |
36 return_code = proc.wait() | 247 return_code = proc.wait() |
37 if return_code: | 248 if return_code: |
38 print("Error downloading Funannotate database.", file=sys.stderr) | 249 print("Error downloading Funannotate database.", file=sys.stderr) |
39 sys.exit(return_code) | 250 sys.exit(return_code) |
40 | 251 |
252 # Download newer busco datasets from orthodb 10 | |
253 if args.partial: | |
254 BUSCO_10_DATASETS = BUSCO_10_DATASETS[:1] | |
255 | |
256 for busco_dataset in BUSCO_10_DATASETS: | |
257 print("Downloading additional busco orthodb10 dataset %s" % busco_dataset) | |
258 dest_tar = os.path.join(output_directory, busco_dataset) | |
259 download_file(BUSCO_10_DATASETS_URL.format(dataset=busco_dataset), dest_tar) | |
260 print("Extracting %s" % busco_dataset) | |
261 tar = tarfile.open(dest_tar, "r:gz") | |
262 tar.extractall(output_directory) | |
263 tar.close() | |
264 os.remove(dest_tar) | |
265 | |
41 version_id = datetime.today().strftime('%Y-%m-%d-%H%M%S') | 266 version_id = datetime.today().strftime('%Y-%m-%d-%H%M%S') |
42 | 267 |
43 version = '1.0' | 268 version = '1.0' |
44 | 269 |
45 data_manager_dict["data_tables"][args.datatable_name].append( | 270 data_manager_dict["data_tables"][args.datatable_name].append( |