Mercurial > repos > earlhaminst > ensembl_get_feature_info
annotate get_sequences.py @ 7:d6bb417dc831 draft default tip
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 17c65045b726d0695814bfe761e534f6521786f1"
author | earlhaminst |
---|---|
date | Tue, 20 Oct 2020 15:06:54 +0000 |
parents | 70e4134eb0ed |
children |
rev | line source |
---|---|
1
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
1 # A simple tool to connect to the Ensembl server and retrieve sequences using |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
2 # the Ensembl REST API. |
2
840ea71e6318
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents:
1
diff
changeset
|
3 from __future__ import print_function |
840ea71e6318
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents:
1
diff
changeset
|
4 |
1
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
5 import json |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
6 import optparse |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
7 from itertools import islice |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
8 |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
9 import requests |
2
840ea71e6318
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents:
1
diff
changeset
|
10 from six.moves.urllib.parse import urljoin |
1
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
11 |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
12 parser = optparse.OptionParser() |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
13 parser.add_option('-i', '--input', help='List of Ensembl IDs') |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
14 |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
15 parser.add_option('-t', '--type', type='choice', |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
16 choices=['genomic', 'cds', 'cdna', 'protein'], |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
17 default='genomic', help='Type of sequence') |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
18 parser.add_option('--expand_3prime', type='int', default=0, |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
19 help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type') |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
20 parser.add_option('--expand_5prime', type='int', default=0, |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
21 help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type') |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
22 options, args = parser.parse_args() |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
23 if options.input is None: |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
24 raise Exception('-i option must be specified') |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
25 |
7
d6bb417dc831
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
6
diff
changeset
|
26 server = 'https://rest.ensembl.org' |
1
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
27 ext = 'sequence/id' |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
28 |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
29 headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'} |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
30 params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime']) |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
31 with open(options.input) as f: |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
32 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
33 while True: |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
34 ids = [line.strip() for line in islice(f, 50)] |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
35 if not ids: |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
36 break |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
37 data = {'ids': ids} |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
38 r = requests.post(urljoin(server, ext), params=params, headers=headers, |
7
d6bb417dc831
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 17c65045b726d0695814bfe761e534f6521786f1"
earlhaminst
parents:
6
diff
changeset
|
39 data=json.dumps(data), allow_redirects=False) |
1
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
40 |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
41 if not r.ok: |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
42 r.raise_for_status() |
396f0f54d115
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
43 |
2
840ea71e6318
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents:
1
diff
changeset
|
44 print(r.text) |