Mercurial > repos > earlhaminst > ensembl_get_feature_info
comparison get_genetree.py @ 8:37cdb55f5258 draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
| author | earlhaminst |
|---|---|
| date | Mon, 17 Feb 2025 14:49:19 +0000 |
| parents | d6bb417dc831 |
| children |
comparison
equal
deleted
inserted
replaced
| 7:d6bb417dc831 | 8:37cdb55f5258 |
|---|---|
| 1 # A simple tool to connect to the Ensembl server and retrieve genetree using | 1 # A simple tool to connect to the Ensembl server and retrieve genetree using |
| 2 # the Ensembl REST API. | 2 # the Ensembl REST API. |
| 3 from __future__ import print_function | |
| 4 | |
| 5 import optparse | 3 import optparse |
| 4 from urllib.parse import urljoin | |
| 6 | 5 |
| 7 import requests | 6 import requests |
| 8 from six.moves.urllib.parse import urljoin | |
| 9 | 7 |
| 10 parser = optparse.OptionParser() | 8 parser = optparse.OptionParser() |
| 11 parser.add_option('--id_type', type='choice', default='gene_id', | 9 parser.add_option( |
| 12 choices=['gene_id', 'gene_tree_id'], help='Input type') | 10 "--id_type", |
| 13 parser.add_option('-i', '--input', help='Ensembl ID') | 11 type="choice", |
| 14 parser.add_option('--format', type='choice', | 12 default="gene_id", |
| 15 choices=['json', 'orthoxml', 'phyloxml', 'nh'], | 13 choices=["gene_id", "gene_tree_id"], |
| 16 default='json', help='Output format') | 14 help="Input type", |
| 17 parser.add_option('-s', '--sequence', type='choice', | 15 ) |
| 18 choices=['protein', 'cdna', 'none'], default='protein', | 16 parser.add_option("--species", help="Species name/alias") |
| 19 help='The type of sequence to bring back. Setting it to none results in no sequence being returned') | 17 parser.add_option("-i", "--input", help="Ensembl ID") |
| 18 parser.add_option( | |
| 19 "--format", | |
| 20 type="choice", | |
| 21 choices=["json", "orthoxml", "phyloxml", "nh"], | |
| 22 default="json", | |
| 23 help="Output format", | |
| 24 ) | |
| 25 parser.add_option( | |
| 26 "-s", | |
| 27 "--sequence", | |
| 28 type="choice", | |
| 29 choices=["protein", "cdna", "none"], | |
| 30 default="protein", | |
| 31 help="The type of sequence to bring back. Setting it to none results in no sequence being returned", | |
| 32 ) | |
| 20 | 33 |
| 21 parser.add_option('-a', '--aligned', type='choice', choices=['0', '1'], | 34 parser.add_option( |
| 22 default='0', help='Return the aligned string if true. Otherwise, return the original sequence (no insertions)') | 35 "-a", |
| 23 parser.add_option('-c', '--cigar_line', type='choice', choices=['0', '1'], | 36 "--aligned", |
| 24 default='0', | 37 type="choice", |
| 25 help='Return the aligned sequence encoded in CIGAR format') | 38 choices=["0", "1"], |
| 26 parser.add_option('--nh_format', type='choice', | 39 default="0", |
| 27 choices=['full', 'display_label_composite', 'simple', 'species', 'species_short_name', 'ncbi_taxon', 'ncbi_name', 'njtree', 'phylip'], | 40 help="Return the aligned string if true. Otherwise, return the original sequence (no insertions)", |
| 28 default='simple', | 41 ) |
| 29 help='The format of a NH (New Hampshire) request') | 42 parser.add_option( |
| 43 "-c", | |
| 44 "--cigar_line", | |
| 45 type="choice", | |
| 46 choices=["0", "1"], | |
| 47 default="0", | |
| 48 help="Return the aligned sequence encoded in CIGAR format", | |
| 49 ) | |
| 50 parser.add_option( | |
| 51 "--nh_format", | |
| 52 type="choice", | |
| 53 choices=[ | |
| 54 "full", | |
| 55 "display_label_composite", | |
| 56 "simple", | |
| 57 "species", | |
| 58 "species_short_name", | |
| 59 "ncbi_taxon", | |
| 60 "ncbi_name", | |
| 61 "njtree", | |
| 62 "phylip", | |
| 63 ], | |
| 64 default="simple", | |
| 65 help="The format of a NH (New Hampshire) request", | |
| 66 ) | |
| 30 options, args = parser.parse_args() | 67 options, args = parser.parse_args() |
| 31 if options.input is None: | 68 if options.input is None: |
| 32 raise Exception('-i option must be specified') | 69 raise Exception("-i option must be specified") |
| 33 | 70 |
| 34 server = 'https://rest.ensembl.org' | 71 server = "https://rest.ensembl.org" |
| 35 | 72 |
| 36 if options.id_type == 'gene_id': | 73 if options.id_type == "gene_id": |
| 37 ext = 'genetree/member/id' | 74 ext = f"genetree/member/id/{options.species}/{options.input}" |
| 38 elif options.id_type == 'gene_tree_id': | 75 elif options.id_type == "gene_tree_id": |
| 39 ext = 'genetree/id' | 76 ext = f"genetree/id/{options.input}" |
| 40 | 77 |
| 41 if options.format == 'json': | 78 if options.format == "json": |
| 42 content_type = 'application/json' | 79 content_type = "application/json" |
| 43 elif options.format == 'orthoxml': | 80 elif options.format == "orthoxml": |
| 44 content_type = 'text/x-orthoxml+xml' | 81 content_type = "text/x-orthoxml+xml" |
| 45 elif options.format == 'phyloxml': | 82 elif options.format == "phyloxml": |
| 46 content_type = 'text/x-phyloxml+xml' | 83 content_type = "text/x-phyloxml+xml" |
| 47 elif options.format == 'nh': | 84 elif options.format == "nh": |
| 48 content_type = 'text/x-nh' | 85 content_type = "text/x-nh" |
| 49 headers = {'Content-Type': content_type} | 86 headers = {"Content-Type": content_type} |
| 50 params = dict((k, getattr(options, k)) for k in ['sequence', 'aligned', 'cigar_line', 'nh_format']) | 87 params = { |
| 51 r = requests.get(urljoin(server, '/'.join([ext, options.input])), params=params, headers=headers) | 88 k: getattr(options, k) for k in ("sequence", "aligned", "cigar_line", "nh_format") |
| 89 } | |
| 90 r = requests.get(urljoin(server, ext), params=params, headers=headers) | |
| 52 | 91 |
| 53 if not r.ok: | 92 if not r.ok: |
| 54 r.raise_for_status() | 93 r.raise_for_status() |
| 55 | 94 |
| 56 print(r.text) | 95 print(r.text) |
