diff get_sequences.py @ 8:935de83b470b draft default tip

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
author earlhaminst
date Mon, 17 Feb 2025 14:49:15 +0000
parents 0618e3bd5138
children
line wrap: on
line diff
--- a/get_sequences.py	Mon Dec 05 16:29:26 2022 +0000
+++ b/get_sequences.py	Mon Feb 17 14:49:15 2025 +0000
@@ -1,42 +1,60 @@
 # A simple tool to connect to the Ensembl server and retrieve sequences using
 # the Ensembl REST API.
-from __future__ import print_function
-
 import json
 import optparse
 from itertools import islice
+from urllib.parse import urljoin
 
 import requests
-from six.moves.urllib.parse import urljoin
 
 parser = optparse.OptionParser()
-parser.add_option('-i', '--input', help='List of Ensembl IDs')
+parser.add_option("-i", "--input", help="List of Ensembl IDs")
 
-parser.add_option('-t', '--type', type='choice',
-                  choices=['genomic', 'cds', 'cdna', 'protein'],
-                  default='genomic', help='Type of sequence')
-parser.add_option('--expand_3prime', type='int', default=0,
-                  help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type')
-parser.add_option('--expand_5prime', type='int', default=0,
-                  help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type')
+parser.add_option(
+    "-t",
+    "--type",
+    type="choice",
+    choices=["genomic", "cds", "cdna", "protein"],
+    default="genomic",
+    help="Type of sequence",
+)
+parser.add_option(
+    "--expand_3prime",
+    type="int",
+    default=0,
+    help="Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type",
+)
+parser.add_option(
+    "--expand_5prime",
+    type="int",
+    default=0,
+    help="Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type",
+)
 options, args = parser.parse_args()
 if options.input is None:
-    raise Exception('-i option must be specified')
+    raise Exception("-i option must be specified")
+
+server = "https://rest.ensembl.org"
+ext = "sequence/id"
 
-server = 'https://rest.ensembl.org'
-ext = 'sequence/id'
-
-headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'}
-params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime'])
+headers = {"Content-Type": "text/x-fasta", "Accept": "text/x-fasta"}
+params = {
+    k: getattr(options, k) for k in ("type", "expand_3prime", "expand_5prime")
+}
 with open(options.input) as f:
     # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl
     while True:
         ids = [line.strip() for line in islice(f, 50)]
         if not ids:
             break
-        data = {'ids': ids}
-        r = requests.post(urljoin(server, ext), params=params, headers=headers,
-                          data=json.dumps(data), allow_redirects=False)
+        data = {"ids": ids}
+        r = requests.post(
+            urljoin(server, ext),
+            params=params,
+            headers=headers,
+            data=json.dumps(data),
+            allow_redirects=False,
+        )
 
         if not r.ok:
             r.raise_for_status()