Mercurial > repos > earlhaminst > ensembl_get_feature_info
comparison get_sequences.py @ 8:37cdb55f5258 draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
| author | earlhaminst | 
|---|---|
| date | Mon, 17 Feb 2025 14:49:19 +0000 | 
| parents | d6bb417dc831 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 7:d6bb417dc831 | 8:37cdb55f5258 | 
|---|---|
| 1 # A simple tool to connect to the Ensembl server and retrieve sequences using | 1 # A simple tool to connect to the Ensembl server and retrieve sequences using | 
| 2 # the Ensembl REST API. | 2 # the Ensembl REST API. | 
| 3 from __future__ import print_function | |
| 4 | |
| 5 import json | 3 import json | 
| 6 import optparse | 4 import optparse | 
| 7 from itertools import islice | 5 from itertools import islice | 
| 6 from urllib.parse import urljoin | |
| 8 | 7 | 
| 9 import requests | 8 import requests | 
| 10 from six.moves.urllib.parse import urljoin | |
| 11 | 9 | 
| 12 parser = optparse.OptionParser() | 10 parser = optparse.OptionParser() | 
| 13 parser.add_option('-i', '--input', help='List of Ensembl IDs') | 11 parser.add_option("-i", "--input", help="List of Ensembl IDs") | 
| 14 | 12 | 
| 15 parser.add_option('-t', '--type', type='choice', | 13 parser.add_option( | 
| 16 choices=['genomic', 'cds', 'cdna', 'protein'], | 14 "-t", | 
| 17 default='genomic', help='Type of sequence') | 15 "--type", | 
| 18 parser.add_option('--expand_3prime', type='int', default=0, | 16 type="choice", | 
| 19 help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type') | 17 choices=["genomic", "cds", "cdna", "protein"], | 
| 20 parser.add_option('--expand_5prime', type='int', default=0, | 18 default="genomic", | 
| 21 help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type') | 19 help="Type of sequence", | 
| 20 ) | |
| 21 parser.add_option( | |
| 22 "--expand_3prime", | |
| 23 type="int", | |
| 24 default=0, | |
| 25 help="Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type", | |
| 26 ) | |
| 27 parser.add_option( | |
| 28 "--expand_5prime", | |
| 29 type="int", | |
| 30 default=0, | |
| 31 help="Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type", | |
| 32 ) | |
| 22 options, args = parser.parse_args() | 33 options, args = parser.parse_args() | 
| 23 if options.input is None: | 34 if options.input is None: | 
| 24 raise Exception('-i option must be specified') | 35 raise Exception("-i option must be specified") | 
| 25 | 36 | 
| 26 server = 'https://rest.ensembl.org' | 37 server = "https://rest.ensembl.org" | 
| 27 ext = 'sequence/id' | 38 ext = "sequence/id" | 
| 28 | 39 | 
| 29 headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'} | 40 headers = {"Content-Type": "text/x-fasta", "Accept": "text/x-fasta"} | 
| 30 params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime']) | 41 params = { | 
| 42 k: getattr(options, k) for k in ("type", "expand_3prime", "expand_5prime") | |
| 43 } | |
| 31 with open(options.input) as f: | 44 with open(options.input) as f: | 
| 32 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl | 45 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl | 
| 33 while True: | 46 while True: | 
| 34 ids = [line.strip() for line in islice(f, 50)] | 47 ids = [line.strip() for line in islice(f, 50)] | 
| 35 if not ids: | 48 if not ids: | 
| 36 break | 49 break | 
| 37 data = {'ids': ids} | 50 data = {"ids": ids} | 
| 38 r = requests.post(urljoin(server, ext), params=params, headers=headers, | 51 r = requests.post( | 
| 39 data=json.dumps(data), allow_redirects=False) | 52 urljoin(server, ext), | 
| 53 params=params, | |
| 54 headers=headers, | |
| 55 data=json.dumps(data), | |
| 56 allow_redirects=False, | |
| 57 ) | |
| 40 | 58 | 
| 41 if not r.ok: | 59 if not r.ok: | 
| 42 r.raise_for_status() | 60 r.raise_for_status() | 
| 43 | 61 | 
| 44 print(r.text) | 62 print(r.text) | 
