Mercurial > repos > galaxyp > retrieve_ensembl_bed
annotate ensembl_rest.py @ 1:9c4a48f5d4e7 draft default tip
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 6babd357845126292cb202aaea0f70ff68819525"
| author | galaxyp | 
|---|---|
| date | Mon, 07 Oct 2019 16:14:39 -0400 | 
| parents | da1b538b87e5 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
2 """ | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
3 # | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
4 #------------------------------------------------------------------------------ | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
5 # University of Minnesota | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
6 # Copyright 2017, Regents of the University of Minnesota | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
7 #------------------------------------------------------------------------------ | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
8 # Author: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
9 # | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
10 # James E Johnson | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
11 # | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
12 #------------------------------------------------------------------------------ | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
13 """ | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
14 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
15 from __future__ import print_function | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
16 from __future__ import unicode_literals | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
17 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
18 import sys | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
19 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
20 from time import sleep | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
21 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
22 import requests | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
23 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
24 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
25 server = "https://rest.ensembl.org" | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
26 ext = "/info/assembly/homo_sapiens?" | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
27 max_region = 4000000 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
28 debug = False | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
29 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
30 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
31 def ensembl_rest(ext, headers): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
32 if debug: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
33 print("%s" % ext, file=sys.stderr) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
34 r = requests.get(server+ext, headers=headers) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
35 if r.status_code == 429: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
36 print("response headers: %s\n" % r.headers, file=sys.stderr) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
37 if 'Retry-After' in r.headers: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
38 sleep(r.headers['Retry-After']) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
39 r = requests.get(server+ext, headers=headers) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
40 if not r.ok: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
41 r.raise_for_status() | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
42 return r | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
43 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
44 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
45 def get_species(): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
46 results = dict() | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
47 ext = "/info/species" | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
48 req_header = {"Content-Type": "application/json"} | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
49 r = ensembl_rest(ext, req_header) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
50 for species in r.json()['species']: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
51 results[species['name']] = species | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
52 print("%s\t%s\t%s\t%s\t%s" % | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
53 (species['name'], species['common_name'], | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
54 species['display_name'], | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
55 species['strain'], | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
56 species['taxon_id']), file=sys.stdout) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
57 return results | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
58 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
59 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
60 def get_biotypes(species): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
61 biotypes = [] | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
62 ext = "/info/biotypes/%s?" % species | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
63 req_header = {"Content-Type": "application/json"} | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
64 r = ensembl_rest(ext, req_header) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
65 for entry in r.json(): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
66 if 'biotype' in entry: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
67 biotypes.append(entry['biotype']) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
68 return biotypes | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
69 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
70 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
71 def get_toplevel(species): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
72 coord_systems = dict() | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
73 ext = "/info/assembly/%s?" % species | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
74 req_header = {"Content-Type": "application/json"} | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
75 r = ensembl_rest(ext, req_header) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
76 toplevel = r.json() | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
77 for seq in toplevel['top_level_region']: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
78 if seq['coord_system'] not in coord_systems: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
79 coord_systems[seq['coord_system']] = dict() | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
80 coord_system = coord_systems[seq['coord_system']] | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
81 coord_system[seq['name']] = int(seq['length']) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
82 return coord_systems | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
83 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
84 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
85 def get_transcripts_bed(species, refseq, start, length, strand='', | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
86 params=None): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
87 bed = [] | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
88 param = params if params else '' | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
89 req_header = {"Content-Type": "text/x-bed"} | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
90 regions = list(range(start, length, max_region)) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
91 if not regions or regions[-1] < length: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
92 regions.append(length) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
93 for end in regions[1:]: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
94 ext = "/overlap/region/%s/%s:%d-%d%s?feature=transcript;%s"\ | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
95 % (species, refseq, start, end, strand, param) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
96 start = end + 1 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
97 r = ensembl_rest(ext, req_header) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
98 if r.text: | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
99 bed += r.text.splitlines() | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
100 return bed | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
101 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
102 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
103 def get_seq(id, seqtype, params=None): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
104 param = params if params else '' | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
105 ext = "/sequence/id/%s?type=%s;%s" % (id, seqtype, param) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
106 req_header = {"Content-Type": "text/plain"} | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
107 r = ensembl_rest(ext, req_header) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
108 return r.text | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
109 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
110 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
111 def get_cdna(id, params=None): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
112 return get_seq(id, 'cdna', params=params) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
113 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
114 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
115 def get_cds(id, params=None): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
116 return get_seq(id, 'cds', params=params) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
117 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
118 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
119 def get_genomic(id, params=None): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
120 return get_seq(id, 'genomic', params=params) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
121 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
122 | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
123 def get_transcript_haplotypes(species, transcript): | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
124 ext = "/transcript_haplotypes/%s/%s?aligned_sequences=1"\ | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
125 % (species, transcript) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
126 req_header = {"Content-Type": "application/json"} | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
127 r = ensembl_rest(ext, req_header) | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
128 decoded = r.json() | 
| 
 
da1b538b87e5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
 
galaxyp 
parents:  
diff
changeset
 | 
129 return decoded | 
