Mercurial > repos > earlhaminst > ensembl_get_genetree
changeset 8:935de83b470b draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
author | earlhaminst |
---|---|
date | Mon, 17 Feb 2025 14:49:15 +0000 |
parents | 515e7181d5e9 |
children | |
files | get_feature_info.py get_genetree.py get_genetree.xml get_sequences.py macros.xml |
diffstat | 5 files changed, 195 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- a/get_feature_info.py Mon Dec 05 16:29:26 2022 +0000 +++ b/get_feature_info.py Mon Feb 17 14:49:15 2025 +0000 @@ -1,37 +1,45 @@ # A simple tool to connect to the Ensembl server and retrieve feature # information using the Ensembl REST API. -from __future__ import print_function - import json import optparse from itertools import islice +from urllib.parse import urljoin import requests -from six.moves.urllib.parse import urljoin parser = optparse.OptionParser() -parser.add_option('-i', '--input', help='List of Ensembl IDs') -parser.add_option('-e', '--expand', type='choice', choices=['0', '1'], - default='0', - help='Expands the search to include any connected features. e.g. If the object is a gene, its transcripts, translations and exons will be returned as well.') +parser.add_option("-i", "--input", help="List of Ensembl IDs") +parser.add_option( + "-e", + "--expand", + type="choice", + choices=["0", "1"], + default="0", + help="Expands the search to include any connected features. e.g. If the object is a gene, its transcripts, translations and exons will be returned as well.", +) -parser.add_option('-f', '--format', type='choice', - choices=['full', 'condensed'], default='full', - help='Specify the formats to emit from this endpoint') +parser.add_option( + "-f", + "--format", + type="choice", + choices=["full", "condensed"], + default="full", + help="Specify the formats to emit from this endpoint", +) options, args = parser.parse_args() if options.input is None: - raise Exception('-i option must be specified') + raise Exception("-i option must be specified") -server = 'https://rest.ensembl.org' -ext = 'lookup/id' +server = "https://rest.ensembl.org" +ext = "lookup/id" -headers = {'Content-Type': 'application/json', 'Accept': 'application/json'} -params = dict((k, getattr(options, k)) for k in ['format', 'expand']) +headers = {"Content-Type": "application/json", "Accept": "application/json"} +params = {k: getattr(options, k) for k in ("format", "expand")} first = True -print('{') +print("{") with open(options.input) as f: while True: @@ -40,9 +48,14 @@ break if not first: print(",") - data = {'ids': ids} - r = requests.post(urljoin(server, ext), params=params, headers=headers, - data=json.dumps(data), allow_redirects=False) + data = {"ids": ids} + r = requests.post( + urljoin(server, ext), + params=params, + headers=headers, + data=json.dumps(data), + allow_redirects=False, + ) if not r.ok: r.raise_for_status() @@ -51,4 +64,4 @@ first = False -print('}') +print("}")
--- a/get_genetree.py Mon Dec 05 16:29:26 2022 +0000 +++ b/get_genetree.py Mon Feb 17 14:49:15 2025 +0000 @@ -1,54 +1,93 @@ # A simple tool to connect to the Ensembl server and retrieve genetree using # the Ensembl REST API. -from __future__ import print_function - import optparse +from urllib.parse import urljoin import requests -from six.moves.urllib.parse import urljoin parser = optparse.OptionParser() -parser.add_option('--id_type', type='choice', default='gene_id', - choices=['gene_id', 'gene_tree_id'], help='Input type') -parser.add_option('-i', '--input', help='Ensembl ID') -parser.add_option('--format', type='choice', - choices=['json', 'orthoxml', 'phyloxml', 'nh'], - default='json', help='Output format') -parser.add_option('-s', '--sequence', type='choice', - choices=['protein', 'cdna', 'none'], default='protein', - help='The type of sequence to bring back. Setting it to none results in no sequence being returned') +parser.add_option( + "--id_type", + type="choice", + default="gene_id", + choices=["gene_id", "gene_tree_id"], + help="Input type", +) +parser.add_option("--species", help="Species name/alias") +parser.add_option("-i", "--input", help="Ensembl ID") +parser.add_option( + "--format", + type="choice", + choices=["json", "orthoxml", "phyloxml", "nh"], + default="json", + help="Output format", +) +parser.add_option( + "-s", + "--sequence", + type="choice", + choices=["protein", "cdna", "none"], + default="protein", + help="The type of sequence to bring back. Setting it to none results in no sequence being returned", +) -parser.add_option('-a', '--aligned', type='choice', choices=['0', '1'], - default='0', help='Return the aligned string if true. Otherwise, return the original sequence (no insertions)') -parser.add_option('-c', '--cigar_line', type='choice', choices=['0', '1'], - default='0', - help='Return the aligned sequence encoded in CIGAR format') -parser.add_option('--nh_format', type='choice', - choices=['full', 'display_label_composite', 'simple', 'species', 'species_short_name', 'ncbi_taxon', 'ncbi_name', 'njtree', 'phylip'], - default='simple', - help='The format of a NH (New Hampshire) request') +parser.add_option( + "-a", + "--aligned", + type="choice", + choices=["0", "1"], + default="0", + help="Return the aligned string if true. Otherwise, return the original sequence (no insertions)", +) +parser.add_option( + "-c", + "--cigar_line", + type="choice", + choices=["0", "1"], + default="0", + help="Return the aligned sequence encoded in CIGAR format", +) +parser.add_option( + "--nh_format", + type="choice", + choices=[ + "full", + "display_label_composite", + "simple", + "species", + "species_short_name", + "ncbi_taxon", + "ncbi_name", + "njtree", + "phylip", + ], + default="simple", + help="The format of a NH (New Hampshire) request", +) options, args = parser.parse_args() if options.input is None: - raise Exception('-i option must be specified') + raise Exception("-i option must be specified") -server = 'https://rest.ensembl.org' +server = "https://rest.ensembl.org" -if options.id_type == 'gene_id': - ext = 'genetree/member/id' -elif options.id_type == 'gene_tree_id': - ext = 'genetree/id' +if options.id_type == "gene_id": + ext = f"genetree/member/id/{options.species}/{options.input}" +elif options.id_type == "gene_tree_id": + ext = f"genetree/id/{options.input}" -if options.format == 'json': - content_type = 'application/json' -elif options.format == 'orthoxml': - content_type = 'text/x-orthoxml+xml' -elif options.format == 'phyloxml': - content_type = 'text/x-phyloxml+xml' -elif options.format == 'nh': - content_type = 'text/x-nh' -headers = {'Content-Type': content_type} -params = dict((k, getattr(options, k)) for k in ['sequence', 'aligned', 'cigar_line', 'nh_format']) -r = requests.get(urljoin(server, '/'.join([ext, options.input])), params=params, headers=headers) +if options.format == "json": + content_type = "application/json" +elif options.format == "orthoxml": + content_type = "text/x-orthoxml+xml" +elif options.format == "phyloxml": + content_type = "text/x-phyloxml+xml" +elif options.format == "nh": + content_type = "text/x-nh" +headers = {"Content-Type": content_type} +params = { + k: getattr(options, k) for k in ("sequence", "aligned", "cigar_line", "nh_format") +} +r = requests.get(urljoin(server, ext), params=params, headers=headers) if not r.ok: r.raise_for_status()
--- a/get_genetree.xml Mon Dec 05 16:29:26 2022 +0000 +++ b/get_genetree.xml Mon Feb 17 14:49:15 2025 +0000 @@ -1,6 +1,7 @@ -<tool id="get_genetree" name="Get gene tree by Ensembl ID" version="0.1.2"> +<tool id="get_genetree" name="Get gene tree by Ensembl ID" version="1.0.0"> <description>using REST API</description> <macros> + <import>macros.xml</import> <xml name="sequence_conditional"> <conditional name="sequence"> <param name="sequence_selector" type="select" label="Sequence type" help="The type of sequences to bring back. If 'None', no sequence is returned"> @@ -21,13 +22,16 @@ </xml> </macros> <requirements> - <requirement type="package" version="2.12.4">requests</requirement> - <requirement type="package" version="1.10.0">six</requirement> + <requirement type="package" version="3.13">python</requirement> + <requirement type="package" version="2.32.3">requests</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ -python '$__tool_directory__/get_genetree.py' ---id_type $input_type +python3 '$__tool_directory__/get_genetree.py' +--id_type ${input_type_conditional.input_type} +#if $input_type_conditional.input_type == "gene_id" + --species ${input_type_conditional.species} +#end if -i '$input' --format ${output_format.output_format_selector} #if $output_format.output_format_selector == 'json' @@ -49,10 +53,18 @@ </command> <inputs> - <param name="input_type" type="select" label="Input type"> - <option value="gene_id" selected="true">Ensembl gene ID</option> - <option value="gene_tree_id">Ensembl genetree ID</option> - </param> + <conditional name="input_type_conditional"> + <param name="input_type" type="select" label="Input type"> + <option value="gene_id" selected="true">Ensembl gene ID</option> + <option value="gene_tree_id">Ensembl genetree ID</option> + </param> + <when value="gene_id"> + <param name="species" type="text" label="Species name/alias"> + <validator type="empty_field" /> + </param> + </when> + <when value="gene_tree_id" /> + </conditional> <param name="input" type="text" label="Ensembl ID"> <validator type="empty_field" /> </param> @@ -98,7 +110,10 @@ <tests> <test> - <param name="input_type" value="gene_id" /> + <conditional name="input_type_conditional"> + <param name="input_type" value="gene_id" /> + <param name="species" value="Chicken" /> + </conditional> <param name="input" value="ENSGALP00010009242" /> <param name="output_format_selector" value="json" /> <param name="sequence_selector" value="protein" /> @@ -114,7 +129,10 @@ </output> </test> <test> - <param name="input_type" value="gene_id" /> + <conditional name="input_type_conditional"> + <param name="input_type" value="gene_id" /> + <param name="species" value="Chicken" /> + </conditional> <param name="input" value="ENSGALP00010009242" /> <param name="output_format_selector" value="nh" /> <param name="nh_format" value="simple" /> @@ -126,7 +144,9 @@ </output> </test> <test> - <param name="input_type" value="gene_tree_id" /> + <conditional name="input_type_conditional"> + <param name="input_type" value="gene_tree_id" /> + </conditional> <param name="input" value="ENSGT00390000003602" /> <param name="output_format_selector" value="json" /> <param name="sequence_selector" value="protein" /> @@ -142,7 +162,9 @@ </output> </test> <test> - <param name="input_type" value="gene_tree_id" /> + <conditional name="input_type_conditional"> + <param name="input_type" value="gene_tree_id" /> + </conditional> <param name="input" value="ENSGT00390000003602" /> <param name="output_format_selector" value="phyloxml" /> <param name="sequence_selector" value="protein" /> @@ -164,12 +186,11 @@ Retrieve a gene tree from Ensembl using its REST API. -Uses the `"GET genetree/id"`_ and `"GET genetree/member/id"`_ API endpoint. +Uses the `"GET genetree/id/:id"`_ and `"GET genetree/member/id/:species/:id"`_ API endpoints. -.. _"GET genetree/id": https://rest.ensembl.org/documentation/info/genetree -.. _"GET genetree/member/id": https://rest.ensembl.org/documentation/info/genetree_member_id +.. _"GET genetree/id/:id": https://rest.ensembl.org/documentation/info/genetree +.. _"GET genetree/member/id/:species/:id": https://rest.ensembl.org/documentation/info/genetree_species_member_id ]]> </help> - <citations> - </citations> + <expand macro="citations" /> </tool>
--- a/get_sequences.py Mon Dec 05 16:29:26 2022 +0000 +++ b/get_sequences.py Mon Feb 17 14:49:15 2025 +0000 @@ -1,42 +1,60 @@ # A simple tool to connect to the Ensembl server and retrieve sequences using # the Ensembl REST API. -from __future__ import print_function - import json import optparse from itertools import islice +from urllib.parse import urljoin import requests -from six.moves.urllib.parse import urljoin parser = optparse.OptionParser() -parser.add_option('-i', '--input', help='List of Ensembl IDs') +parser.add_option("-i", "--input", help="List of Ensembl IDs") -parser.add_option('-t', '--type', type='choice', - choices=['genomic', 'cds', 'cdna', 'protein'], - default='genomic', help='Type of sequence') -parser.add_option('--expand_3prime', type='int', default=0, - help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type') -parser.add_option('--expand_5prime', type='int', default=0, - help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type') +parser.add_option( + "-t", + "--type", + type="choice", + choices=["genomic", "cds", "cdna", "protein"], + default="genomic", + help="Type of sequence", +) +parser.add_option( + "--expand_3prime", + type="int", + default=0, + help="Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type", +) +parser.add_option( + "--expand_5prime", + type="int", + default=0, + help="Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type", +) options, args = parser.parse_args() if options.input is None: - raise Exception('-i option must be specified') + raise Exception("-i option must be specified") + +server = "https://rest.ensembl.org" +ext = "sequence/id" -server = 'https://rest.ensembl.org' -ext = 'sequence/id' - -headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'} -params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime']) +headers = {"Content-Type": "text/x-fasta", "Accept": "text/x-fasta"} +params = { + k: getattr(options, k) for k in ("type", "expand_3prime", "expand_5prime") +} with open(options.input) as f: # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl while True: ids = [line.strip() for line in islice(f, 50)] if not ids: break - data = {'ids': ids} - r = requests.post(urljoin(server, ext), params=params, headers=headers, - data=json.dumps(data), allow_redirects=False) + data = {"ids": ids} + r = requests.post( + urljoin(server, ext), + params=params, + headers=headers, + data=json.dumps(data), + allow_redirects=False, + ) if not r.ok: r.raise_for_status()