Mercurial > repos > earlhaminst > ensembl_get_sequences
annotate get_sequences.py @ 7:c79ce2342f1e draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
| author | earlhaminst | 
|---|---|
| date | Mon, 17 Feb 2025 14:49:24 +0000 | 
| parents | 7af66c2b3831 | 
| children | 
| rev | line source | 
|---|---|
| 
1
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
1 # A simple tool to connect to the Ensembl server and retrieve sequences using | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
2 # the Ensembl REST API. | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
3 import json | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
4 import optparse | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
5 from itertools import islice | 
| 
7
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
6 from urllib.parse import urljoin | 
| 
1
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
7 | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
8 import requests | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
9 | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
10 parser = optparse.OptionParser() | 
| 
7
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
11 parser.add_option("-i", "--input", help="List of Ensembl IDs") | 
| 
1
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
12 | 
| 
7
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
13 parser.add_option( | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
14 "-t", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
15 "--type", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
16 type="choice", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
17 choices=["genomic", "cds", "cdna", "protein"], | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
18 default="genomic", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
19 help="Type of sequence", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
20 ) | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
21 parser.add_option( | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
22 "--expand_3prime", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
23 type="int", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
24 default=0, | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
25 help="Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
26 ) | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
27 parser.add_option( | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
28 "--expand_5prime", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
29 type="int", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
30 default=0, | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
31 help="Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type", | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
32 ) | 
| 
1
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
33 options, args = parser.parse_args() | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
34 if options.input is None: | 
| 
7
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
35 raise Exception("-i option must be specified") | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
36 | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
37 server = "https://rest.ensembl.org" | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
38 ext = "sequence/id" | 
| 
1
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
39 | 
| 
7
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
40 headers = {"Content-Type": "text/x-fasta", "Accept": "text/x-fasta"} | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
41 params = { | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
42 k: getattr(options, k) for k in ("type", "expand_3prime", "expand_5prime") | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
43 } | 
| 
1
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
44 with open(options.input) as f: | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
45 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
46 while True: | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
47 ids = [line.strip() for line in islice(f, 50)] | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
48 if not ids: | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
49 break | 
| 
7
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
50 data = {"ids": ids} | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
51 r = requests.post( | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
52 urljoin(server, ext), | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
53 params=params, | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
54 headers=headers, | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
55 data=json.dumps(data), | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
56 allow_redirects=False, | 
| 
 
c79ce2342f1e
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
 
earlhaminst 
parents: 
6 
diff
changeset
 | 
57 ) | 
| 
1
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
58 | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
59 if not r.ok: | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
60 r.raise_for_status() | 
| 
 
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
61 | 
| 
2
 
4b7261f484bb
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
62 print(r.text) | 
