Previous changeset 0:76b2c482f1e8 (2016-08-11) Next changeset 2:4b7261f484bb (2016-12-21) |
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty |
modified:
test-data/genetree.json test-data/genetree.phyloxml |
added:
get_feature_info.py get_genetree.py get_sequences.py get_sequences.xml |
removed:
get_feature_info/get_feature_info.py get_genetree/get_genetree.py get_sequences/get_sequences.py get_sequences/get_sequences.xml |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc get_feature_info.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_feature_info.py Mon Dec 12 07:47:42 2016 -0500 |
[ |
@@ -0,0 +1,41 @@ +# A simple tool to connect to the Ensembl server and retrieve feature +# information using the Ensembl REST API. +import json +import optparse +from urlparse import urljoin + +import requests + +parser = optparse.OptionParser() +parser.add_option('-i', '--input', help='List of Ensembl IDs') +parser.add_option('-e', '--expand', type='choice', choices=['0', '1'], + default='0', + help='Expands the search to include any connected features. e.g. If the object is a gene, its transcripts, translations and exons will be returned as well.') + +parser.add_option('-s', '--species', type='choice', + choices=['ensembl', 'ensemblgenomes'], default='ensembl', + help='Specify the genome databases for vertebrates and other eukaryotic species') + +parser.add_option('-f', '--format', type='choice', + choices=['full', 'condensed'], default='full', + help='Specify the formats to emit from this endpoint') +options, args = parser.parse_args() +if options.input is None: + raise Exception('-i option must be specified') + + +server = 'http://rest.%s.org' % options.species +ext = 'lookup/id' + +headers = {'Content-Type': 'application/json', 'Accept': 'application/json'} +params = dict((k, getattr(options, k)) for k in ['format', 'expand']) +with open(options.input) as f: + ids = [line.strip() for line in f] +data = {'ids': ids} +r = requests.post(urljoin(server, ext), params=params, headers=headers, + data=json.dumps(data)) + +if not r.ok: + r.raise_for_status() + +print r.text |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc get_feature_info/get_feature_info.py --- a/get_feature_info/get_feature_info.py Thu Aug 11 14:29:50 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,41 +0,0 @@ -# A simple tool to connect to the Ensembl server and retrieve feature -# information using the Ensembl REST API. -import json -import optparse -from urlparse import urljoin - -import requests - -parser = optparse.OptionParser() -parser.add_option('-i', '--input', help='List of Ensembl IDs') -parser.add_option('-e', '--expand', type='choice', choices=['0', '1'], - default='0', - help='Expands the search to include any connected features. e.g. If the object is a gene, its transcripts, translations and exons will be returned as well.') - -parser.add_option('-s', '--species', type='choice', - choices=['ensembl', 'ensemblgenomes'], default='ensembl', - help='Specify the genome databases for vertebrates and other eukaryotic species') - -parser.add_option('-f', '--format', type='choice', - choices=['full', 'condensed'], default='full', - help='Specify the formats to emit from this endpoint') -options, args = parser.parse_args() -if options.input is None: - raise Exception('-i option must be specified') - - -server = 'http://rest.%s.org' % options.species -ext = 'lookup/id' - -headers = {'Content-Type': 'application/json', 'Accept': 'application/json'} -params = dict((k, getattr(options, k)) for k in ['format', 'expand']) -with open(options.input) as f: - ids = [line.strip() for line in f] -data = {'ids': ids} -r = requests.post(urljoin(server, ext), params=params, headers=headers, - data=json.dumps(data)) - -if not r.ok: - r.raise_for_status() - -print r.text |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc get_genetree.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_genetree.py Mon Dec 12 07:47:42 2016 -0500 |
[ |
@@ -0,0 +1,58 @@ +# A simple tool to connect to the Ensembl server and retrieve genetree using +# the Ensembl REST API. +import optparse +from urlparse import urljoin + +import requests + +parser = optparse.OptionParser() +parser.add_option('--id_type', type='choice', default='gene_id', + choices=['gene_id', 'gene_tree_id'], help='Input type') +parser.add_option('-i', '--input', help='Ensembl ID') +parser.add_option('--format', type='choice', + choices=['json', 'orthoxml', 'phyloxml', 'nh'], + default='json', help='Output format') +parser.add_option('-s', '--sequence', type='choice', + choices=['protein', 'cdna', 'none'], default='protein', + help='The type of sequence to bring back. Setting it to none results in no sequence being returned') + +parser.add_option('-g', '--species', type='choice', + choices=['ensembl', 'ensemblgenomes'], default='ensembl', + help='Specify the genome databases for vertebrates and other eukaryotic species') + +parser.add_option('-a', '--aligned', type='choice', choices=['0', '1'], + default='0', help='Return the aligned string if true. Otherwise, return the original sequence (no insertions)') +parser.add_option('-c', '--cigar_line', type='choice', choices=['0', '1'], + default='0', + help='Return the aligned sequence encoded in CIGAR format') +parser.add_option('--nh_format', type='choice', + choices=['full', 'display_label_composite', 'simple', 'species', 'species_short_name', 'ncbi_taxon', 'ncbi_name', 'njtree', 'phylip'], + default='simple', + help='The format of a NH (New Hampshire) request') +options, args = parser.parse_args() +if options.input is None: + raise Exception('-i option must be specified') + +server = 'http://rest.%s.org' % options.species + +if options.id_type == 'gene_id': + ext = 'genetree/member/id' +elif options.id_type == 'gene_tree_id': + ext = 'genetree/id' + +if options.format == 'json': + content_type = 'application/json' +elif options.format == 'orthoxml': + content_type = 'text/x-orthoxml+xml' +elif options.format == 'phyloxml': + content_type = 'text/x-phyloxml+xml' +elif options.format == 'nh': + content_type = 'text/x-nh' +headers = {'Content-Type': content_type} +params = dict((k, getattr(options, k)) for k in ['sequence', 'aligned', 'cigar_line', 'nh_format']) +r = requests.get(urljoin(server, '/'.join([ext, options.input])), params=params, headers=headers) + +if not r.ok: + r.raise_for_status() + +print r.text |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc get_genetree/get_genetree.py --- a/get_genetree/get_genetree.py Thu Aug 11 14:29:50 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,58 +0,0 @@ -# A simple tool to connect to the Ensembl server and retrieve genetree using -# the Ensembl REST API. -import optparse -from urlparse import urljoin - -import requests - -parser = optparse.OptionParser() -parser.add_option('--id_type', type='choice', default='gene_id', - choices=['gene_id', 'gene_tree_id'], help='Input type') -parser.add_option('-i', '--input', help='Ensembl ID') -parser.add_option('--format', type='choice', - choices=['json', 'orthoxml', 'phyloxml', 'nh'], - default='json', help='Output format') -parser.add_option('-s', '--sequence', type='choice', - choices=['protein', 'cdna', 'none'], default='protein', - help='The type of sequence to bring back. Setting it to none results in no sequence being returned') - -parser.add_option('-g', '--species', type='choice', - choices=['ensembl', 'ensemblgenomes'], default='ensembl', - help='Specify the genome databases for vertebrates and other eukaryotic species') - -parser.add_option('-a', '--aligned', type='choice', choices=['0', '1'], - default='0', help='Return the aligned string if true. Otherwise, return the original sequence (no insertions)') -parser.add_option('-c', '--cigar_line', type='choice', choices=['0', '1'], - default='0', - help='Return the aligned sequence encoded in CIGAR format') -parser.add_option('--nh_format', type='choice', - choices=['full', 'display_label_composite', 'simple', 'species', 'species_short_name', 'ncbi_taxon', 'ncbi_name', 'njtree', 'phylip'], - default='simple', - help='The format of a NH (New Hampshire) request') -options, args = parser.parse_args() -if options.input is None: - raise Exception('-i option must be specified') - -server = 'http://rest.%s.org' % options.species - -if options.id_type == 'gene_id': - ext = 'genetree/member/id' -elif options.id_type == 'gene_tree_id': - ext = 'genetree/id' - -if options.format == 'json': - content_type = 'application/json' -elif options.format == 'orthoxml': - content_type = 'text/x-orthoxml+xml' -elif options.format == 'phyloxml': - content_type = 'text/x-phyloxml+xml' -elif options.format == 'nh': - content_type = 'text/x-nh' -headers = {'Content-Type': content_type} -params = dict((k, getattr(options, k)) for k in ['sequence', 'aligned', 'cigar_line', 'nh_format']) -r = requests.get(urljoin(server, '/'.join([ext, options.input])), params=params, headers=headers) - -if not r.ok: - r.raise_for_status() - -print r.text |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc get_sequences.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_sequences.py Mon Dec 12 07:47:42 2016 -0500 |
[ |
@@ -0,0 +1,46 @@ +# A simple tool to connect to the Ensembl server and retrieve sequences using +# the Ensembl REST API. +import json +import optparse +from itertools import islice +from urlparse import urljoin + +import requests + +parser = optparse.OptionParser() +parser.add_option('-i', '--input', help='List of Ensembl IDs') + +parser.add_option('-s', '--species', type='choice', + choices=['ensembl', 'ensemblgenomes'], default='ensembl', + help='Specify the genome databases for vertebrates and other eukaryotic species') + +parser.add_option('-t', '--type', type='choice', + choices=['genomic', 'cds', 'cdna', 'protein'], + default='genomic', help='Type of sequence') +parser.add_option('--expand_3prime', type='int', default=0, + help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type') +parser.add_option('--expand_5prime', type='int', default=0, + help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type') +options, args = parser.parse_args() +if options.input is None: + raise Exception('-i option must be specified') + +server = 'http://rest.%s.org' % options.species +ext = 'sequence/id' + +headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'} +params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime']) +with open(options.input) as f: + # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl + while True: + ids = [line.strip() for line in islice(f, 50)] + if not ids: + break + data = {'ids': ids} + r = requests.post(urljoin(server, ext), params=params, headers=headers, + data=json.dumps(data)) + + if not r.ok: + r.raise_for_status() + + print r.text |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc get_sequences.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_sequences.xml Mon Dec 12 07:47:42 2016 -0500 |
[ |
@@ -0,0 +1,61 @@ +<tool id="get_sequences" name="Get sequences by Ensembl ID" version="0.1.1"> + <description>using REST API</description> + <requirements> + <requirement type="package" version="2.7">requests</requirement> + </requirements> + <command> +<![CDATA[ +python $__tool_directory__/get_sequences.py +-s $species_selector +--expand_3prime $expand_3prime +--expand_5prime $expand_5prime +-t $type_selector +-i "$input" +> "$output" +]]> + </command> + + <inputs> + <param name="input" type="data" format="txt" label="List of Ensembl IDs" /> + <param name="species_selector" type="select" label="Select Species"> + <option value="ensembl" selected="true">Vertebrates</option> + <option value="ensemblgenomes">Other species</option> + </param> + <param name="expand_3prime" type="integer" value="0" min="0" label="expand_3prime" help="Expand each sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type." /> + <param name="expand_5prime" type="integer" value="0" min="0" label="expand_5prime" help="Expand each sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type." /> + <param name="type_selector" type="select" label="Type" help="Type of sequence. Defaults to genomic where applicable, i.e. not translations. cDNA refers to the spliced transcript sequence with UTR; CDS refers to the spliced transcript sequence without UTR"> + <option value="genomic" selected="true">Genomic</option> + <option value="cds">CDS</option> + <option value="cdna">cDNA</option> + <option value="protein">Protein</option> + </param> + </inputs> + + <outputs> + <data name="output" format="fasta" label="$(tool.name) on ${on_string}" /> + </outputs> + + <tests> + <test> + <param name="input" ftype="txt" value="input.txt" /> + <param name="expand_3prime" value="0" /> + <param name="expand_5prime" value="0" /> + <param name="type_selector" value="genomic" /> + <output name="output" file="sequences.fasta" /> + </test> + </tests> + + <help> +<![CDATA[ +**What it does** + +Retrieves FASTA sequences from Ensembl using its REST API. + +Uses the `"POST sequence/id"`_ API endpoint. + +.. _"POST sequence/id": http://rest.ensembl.org/documentation/info/sequence_id_post +]]> + </help> + <citations> + </citations> +</tool> |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc get_sequences/get_sequences.py --- a/get_sequences/get_sequences.py Thu Aug 11 14:29:50 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,46 +0,0 @@ -# A simple tool to connect to the Ensembl server and retrieve sequences using -# the Ensembl REST API. -import json -import optparse -from itertools import islice -from urlparse import urljoin - -import requests - -parser = optparse.OptionParser() -parser.add_option('-i', '--input', help='List of Ensembl IDs') - -parser.add_option('-s', '--species', type='choice', - choices=['ensembl', 'ensemblgenomes'], default='ensembl', - help='Specify the genome databases for vertebrates and other eukaryotic species') - -parser.add_option('-t', '--type', type='choice', - choices=['genomic', 'cds', 'cdna', 'protein'], - default='genomic', help='Type of sequence') -parser.add_option('--expand_3prime', type='int', default=0, - help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type') -parser.add_option('--expand_5prime', type='int', default=0, - help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type') -options, args = parser.parse_args() -if options.input is None: - raise Exception('-i option must be specified') - -server = 'http://rest.%s.org' % options.species -ext = 'sequence/id' - -headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'} -params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime']) -with open(options.input) as f: - # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl - while True: - ids = [line.strip() for line in islice(f, 50)] - if not ids: - break - data = {'ids': ids} - r = requests.post(urljoin(server, ext), params=params, headers=headers, - data=json.dumps(data)) - - if not r.ok: - r.raise_for_status() - - print r.text |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc get_sequences/get_sequences.xml --- a/get_sequences/get_sequences.xml Thu Aug 11 14:29:50 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,61 +0,0 @@ -<tool id="get_sequences" name="Get sequences by Ensembl ID" version="0.1.1"> - <description>using REST API</description> - <requirements> - <requirement type="package" version="2.7">requests</requirement> - </requirements> - <command> -<![CDATA[ -python $__tool_directory__/get_sequences.py --s $species_selector ---expand_3prime $expand_3prime ---expand_5prime $expand_5prime --t $type_selector --i "$input" -> "$output" -]]> - </command> - - <inputs> - <param name="input" type="data" format="txt" label="List of Ensembl IDs" /> - <param name="species_selector" type="select" label="Select Species"> - <option value="ensembl" selected="true">Vertebrates</option> - <option value="ensemblgenomes">Other species</option> - </param> - <param name="expand_3prime" type="integer" value="0" min="0" label="expand_3prime" help="Expand each sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type." /> - <param name="expand_5prime" type="integer" value="0" min="0" label="expand_5prime" help="Expand each sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type." /> - <param name="type_selector" type="select" label="Type" help="Type of sequence. Defaults to genomic where applicable, i.e. not translations. cDNA refers to the spliced transcript sequence with UTR; CDS refers to the spliced transcript sequence without UTR"> - <option value="genomic" selected="true">Genomic</option> - <option value="cds">CDS</option> - <option value="cdna">cDNA</option> - <option value="protein">Protein</option> - </param> - </inputs> - - <outputs> - <data name="output" format="fasta" label="$(tool.name) on ${on_string}" /> - </outputs> - - <tests> - <test> - <param name="input" ftype="txt" value="input.txt" /> - <param name="expand_3prime" value="0" /> - <param name="expand_5prime" value="0" /> - <param name="type_selector" value="genomic" /> - <output name="output" file="sequences.fasta" /> - </test> - </tests> - - <help> -<![CDATA[ -**What it does** - -Retrieves FASTA sequences from Ensembl using its REST API. - -Uses the `"POST sequence/id"`_ API endpoint. - -.. _"POST sequence/id": http://rest.ensembl.org/documentation/info/sequence_id_post -]]> - </help> - <citations> - </citations> -</tool> |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc test-data/genetree.json --- a/test-data/genetree.json Thu Aug 11 14:29:50 2016 -0400 +++ b/test-data/genetree.json Mon Dec 12 07:47:42 2016 -0500 |
[ |
b'@@ -1,1 +1,1 @@\n-{"tree":{"events":{"type":"speciation"},"branch_length":0,"children":[{"events":{"type":"speciation"},"branch_length":0.153275,"children":[{"events":{"type":"speciation"},"branch_length":0.155187,"children":[{"events":{"type":"speciation"},"branch_length":0.133192,"children":[{"events":{"type":"speciation"},"branch_length":0.201095,"children":[{"events":{"type":"speciation"},"branch_length":0.015782,"children":[{"events":{"type":"speciation"},"branch_length":0.217419,"children":[{"sequence":{"mol_seq":{"seq":"QLARDMQDMRIRKKKRQTIRPLPGSLFQKKSSGVARIPFKAAVNGKPPARYTAKPLCGLGVPLNVLEITSETAESFRFSLQHFVKLESLIDKGGIQLADGGWLIPTNDGTAGKEEFYRALCDTPGVDPKLMSEEWVYNHYRWIVWKQASMERSFPEEMGSLCLTPEQVLLQLKYRYDIEVDHSRRPALRKIMEKDDTAAKTLVLCVCGVVFRGSSPKNKSFGDISTPGADPKVENPCAVVWLTDGWYSIKAQLDGPLTSMLHRGRLPVGGKLIIHGAQLVGSENACSPLEAPVSLMLKICANSSRPARWDSKLGFHRDPRPFLLPVSSLYSSGGPVGCVDIIILRSYPILWMERKPEGGTVFRSGRAEEKEARRYNIHKEKAMEILFDKIKAEFEKEEKGNRKPQCRRTINGQNITSLQDGEELYEAVGDDPAFLEAHLTEKQVEVLQNYKRLVMEKQQAELQDRYRRAVESAEDGVGGCPKRDVAPVWRLCIADSMGHSGRVYQLSLWRPPSELQALLKEGCRYKVYNLTTLDSKKQGGNATVQLTATKKTQFEHLQGSEEWLSKHFQPRVATNFVRLQDPEFNPLCSEVDLTGYVITIIDGQGFSPAFYLADGKQNFVKVRCFSSFAQSGLEDVIKPRVLLALSNLQLRGQSTSPTPVVYAGDLTVFSTNPKEVHLQESFSQLKTLVQGQENFFVHAEEKLSQLMSDGLSAIASPAGQIQTPASTVKRRGDMTDVSSNIMVINKTSKVTCQQPGRSHRFSTPINRNSTAHSSAERNPSTIKKRKALDYLSHIPSPPPLSCLSTLSSPSVKKIFIPPRRTEIPGTLKTVKTPNQKPSNTPVDDQWVNDEELAMIDTQAL","is_aligned":0},"location":"scaffold_19:196046-199577","name":"brca2-201","id":[{"source":"EnsEMBL","accession":"ENSTRUP00000015030"}]},"branch_length":0.072273,"id":{"source":"EnsEMBL","accession":"ENSTRUG00000006177"},"confidence":{},"taxonomy":{"scientific_name":"Takifugu rubripes","id":31033}},{"sequence":{"mol_seq":{"seq":"VSFSSDTPRKPKAGSLSSEFTDRFLAQEALDCTKALLEDERLVDDPHMTGECLHRCPQFSLLVNLFVKPHTAVLIPEQPPLKRRLLEEFDRTDGSSRGSALNPEKCSPNGIMGDRRVFKCSVSFQPNITTPHRICSQKAERPVSFLSRRSGTNYVETSLPNTTPTKVSALRDSNEARLQKSNFIPPFIKNVKLDTPNSKTASTFVPPFKKSRNSSKTEEEEPKHHFIPPFTNPCATSSTKKHTAGHLHNVELARDMQGMRIRKKKRQTILPLPGSLFLKKSSGVTRIPLKSAVNGKPPARYTPKQLYGLGVPLNVLEITSETAGSFRFSLQQFVKLESLTDKGGIQLADGGWLIPRNDGTAGKEEFYRALCDTTGVDPKLISEEWVYNHYRWIVWKQASMERSFPEQLGSLCLTPEQVLLQLKYRYDIEVDQSRRPALRKIMERDDTAAKTLILCVCGVVSRGSSPQKQGLGGVAAPSSDPQVENPFAVVWLTDGWYSIKAQLDGPLTSMLNRGRLPVGGKLIIHGAQLVGSQDACSPLEAPESIMLKIFANSSRRARWDAKLGFYRDPRPFLLPVSSLYNSGGPVGCVDIIILRSYPTLWMERKPEGGTVFRSGRAEEKEARRYNVHKEKAMEILFDKIQAEFEKEERDNRKPRSRRRTIGDQDIKSLQDGEELYEAVGDDPAYLEAHLTEQQAETLQNYKRLLIEKKQAELQDRYRRAVETAEDGTGSCPKRDVAPVWRLSIADFMEKPGSVYQLNIWRPPSELQSLLKEGCRYKVYNLTTTDSKKQGGNTTVQLSGTKKTQFEDLQASEELLSTYFQPRVSATFIDLQDPEFHSLCGEVDLTGYVISIIDGQGFSPAFYLTDGKQNFVKVRCFSSFAQSGLEDVIKPSVLLALSNLQLRGQATSPTPVLYAGDLTVFSTNPKEVHLQESFSQLKTLVQ","is_aligned":0},"location":"16:4700614-4705074","name":"brca2-201","id":[{"source":"EnsEMBL","accession":"ENSTNIP00000002435"}]},"branch_length":0.113355,"id":{"source":"EnsEMBL","accession":"ENSTNIG00000016261"},"confidence":{},"taxonomy":{"scientific_name":"Tetraodon nigroviridis","id":99883}}],"confidence":{"bootstrap":100},"taxonomy":{"scientific_name":"Tetraodontidae","id":31031}},{"sequence":{"mol_seq":{"seq":"LPNVELAQDMQDMRIRKKKRQTIRPLPGSLFLTKTSGVTRIPLKAALVFLLQLYRHGVHQHVCEISSETAESFRFNLKQFIKREALLDGGGVQLADGGWLIPSKDGTAGKEEFYRALCDTPGVDPKLISDGWVDNHYRWVVWKQASMERSFPETMGGLCLTPEQVLLQLKYRYDVEVDHSRRPALRRITERDDTAAKTLVLCVCGVVSRSFDDSKTPRGADAGGGNPSAVVWLTDGWYAIRAQLDEPLTAMLRNGRVAVGSKLIVHGAQLVGSQEACSPLEAPEALMLKICANSSRPVRWDAKLGFHKDPRPFLLPLSCLYSSGGQVGCVDMIVLRSYPIQWMERKPEGGVVFRSVRAEEKEAKRFNGLKQKAMEILFAKIQDEFEKEDKGRTCDFTTQAISRQAIAGLQAGEELCEAVGEDPAHLEALLSEQQVETLNTYRRCVMEKKQAQLHDRFQRALESAEASEGSCPKREVTPVWRLGVADSRDQRGRVYQLNLWRPSSDLQALLKEGRRYKVYNLTTSDGKKHNGSSNVQLTGTKKTQFQDLQASREWLSTRFQPRVSACFVDLQNPEFQSLCGEVDLTGFVIQIVDGQGFSPAFYLADGELNFVKVRCFSSFAQSGLEDLVKPRVLLSLSNLQLRGQSASPTPVVYAGDLTVFSANPKDAHLQESLSQNKNLRQSQENFFLIAEETLSRLVQSDGRRPLSSPALHTRTPALATSMIQDTTASVKCVLLMQGASQQLVRSRGTFTPVSRKPPAANCSTEKDAGSVKRRRALNYLSHIPSPPPLLNLGSVASPCVNKTFNPPRRSGTPSTLKTVQTPAHKAQKVDSLVEDEWVNDEELAMIDTQAL","is_aligned":0},"location":"groupI:13362884-13366744","name'..b'EDISTSRNALEIRPELYPEGMCSNRASNGSGNNSEFTAGEGISININQSSLLTTGNVLKNLPSESSGHDVYSVTEHLSTVVKVKRYNDSGHFVNQNLAECNDNHVLSTQKNTANISNRNEDCTSLAPLSFSTASGKSVTVSHDSLQKARLMLSEAANDVTVDTSKQEAAYITPAIRKTEAEKEQNTVDDSDRVNANTFSFSTASGKKVNISGNSLKQVRAVCLSSDPKETSAALFNVEKSVFNEDVKDVSLLQPNVTMPKAVSFSTASGKTVQLSDESLKKARVIFSEIDTCPLMQQQTNESTVEEIVIGGGMTKSKQMPLTTEKVETTRKNNGTFGFNTASGKQVSVSESALQKVKDIFQEFDDPDNYEQNKSLVRLPVSSKIKESTPGTKRLVQTAGSSYKNDNLQCKAGNLRTFQDKQAGKKSLTYSEAAISPIESSVPIYEMQVMLKHTNNQACKYQPRVEVPLQDQRWQNILEIELPATCAPAFRETHNILFFGDLQHSTHFDICSLYSGKNPAVKHQLASHSKMQTLVISGRDSSGTLTLQFTLRIVILHTVNNQYSLNKQLFTFSSALRQVTCIPTQAHLHSKVKIFHQSLPIKSPDVASDSTSKSYSPTAAKETINCSSASKIPAKKFVPPFKKTVATLADNQSNSVQNGSSDGLIESIVYPKEDKVETICSSKDQFDDSDILQMTSNLRCSKDLQEMRIRKKLRQKIKPHPGSLYRLKMSHVKRISLQSAVAERCPTLYSREQLYRYGIVKNHIGVSSENALSFQFHCSNYFTKELLLSGNGVQLADGGWLIPTEQGNAGKEEIYRAFCDTPGVDPKLISAEWVHNHYRWIVWKLAAMEVRFPKTFACRCLTPERVLLQLKYRYDVEIDKSQRSAIKKIMERDDSPAKTLVLCIAKIISQGTRLPNACSNKTEPADSKESSAVIEVTDSWYGIKVLLDPCLTALLHKGRLFIGQKLIVHGAELIGSDDACSPLEAPESLMLKIAANSTRPVRWHTKLGYFKDPRPFCLHLSSLLSEGGVVGCVDVVIQRIYPMQWMEKMANGLYVFRNDRAEEREAEKHSANQQKKLEMLFSKIQAEFEQREVTCNRRKGLRRRSLNAQQMQTLQDGAEIYEAIQNESDPGYLESYLSAEQLKALNHHRQLLNDKKQALIQAEFRKAIECSEQDANGCTRRDVTPVWKLRIADYRNYETDAAYILNIWRPLPDVLSLLKEGCRYKMYHLAASTSKGKSLAADLQLTATKKTRFQQLQLSESILEQIYSPREVTDFSRFQEPLFSAPYAEVDLVGLIISIYKKTGAAPVVYISDESHNIVALKFWTDLGQLGLEEITKPRTYISASNLRWRSDCIEGIPTLYVGDLANISSNPKESHLQRAIQKLKLSVQNVQDFWNSSQTALMKTLQINSTDTTECSKNPTTPTWKSDVSARSGYLTPLHHSGKRLLNSVHTSDPQTENPGCSKEIQLKTCKKRKALDFLNRIPSPPPVTPVRPFVSPSLQKAFRPPRSCSVQKLGPETKGNTENVQGTTPECTKDLAKLEGEFVADEELAMINTQALLLGLEEEKKKTEQKTSRTAGKMTAHESPIENASPVPAQEQQTEEALNIPVGNSEKSYLCLRKRKRK","is_aligned":0},"location":"GL172716.1:1071058-1096238","name":"brca2-201","id":[{"source":"EnsEMBL","accession":"ENSXETP00000060681"}]},"branch_length":0.756151,"id":{"source":"EnsEMBL","accession":"ENSXETG00000017011"},"confidence":{},"taxonomy":{"scientific_name":"Xenopus tropicalis","common_name":"X.tropicalis","id":8364}}],"confidence":{"bootstrap":1},"taxonomy":{"scientific_name":"Tetrapoda","common_name":"Tetrapods","timetree_mya":371.2,"id":32523}},{"sequence":{"mol_seq":{"seq":"MEWEAVESVKALMRDDELTDAGLDASKDSLNRACRRQSGGNFRARKRMRLEQVSADEPPVKRQLLAEFDRTVENGHKSLQKPLICTPNGTLKDRRKFMYSVPLKPVVCGPWSNNSKTGQQVTKPSITLPGRGVETFQPKNHIAPSPVYDPPSNRRGPVFAPPFHGATFRGLQKPSASHTSSKTAKTFVPPFKMKASASHTVHFSSKVINTCEKILENLVYLKPSLASCNIFQSLEEMTANLQCARDLQEMRLRKKQRQNIRPQPGSLYLAKTSGVARVSLKAATGNQCPSSYSTEQLYVHGVGKSTLKVRSENAESFQFSCSDYFGKDVLLAGNGLKLADGGWLIPSDKGMVGKEEFYRALCDTPGVAPKLISESWVYNHYRWIVWKLAAMEAAFPKEFGNRCLTPERVLLQLKYRYDIEVDKCRRSTVKKIMERDDTAAKTLVLCISKLISVEDRFKQTKNKNEKGAEEARKEAVAGVIETTDGWYGIKVLLDPPLTVLVQRGRLSVGCKIITHGAEIIGSQDACTPLEAPECLMLKISANSTRPACWSAKLGFHRDPRPFPLPLASLFNDGGLVGCVDVVVVRLYPIQWMEKKSDGIFVFRNDRAEEREAQRQVENQQRKMESLFAKIQTEFEQKYEAKSKRRGQKAQKFSKQEIQALQDGAELNEAIENSMDPGYFEACLREEQLKVLHGHRQMLNEKKQAEFQAEFKKALESAEQEGKSCCKRGVTPVWKLRIVDYRKPSAAEYILNIWRPLADLHSLLKEGNRYRIYQLLASQSKGRTTTADIQLTATKKTQYQQFQSFPELISELYSPRKAVKFNMLMDPTFRPAYAEVDLVGYTISIEGKPGVAPVVYLSDESHNFVAIKVWTALNQLAVEDIVKPFSLIAASNLQWRSDSRSIIPMLYAGDLSIFSSNPKEGHLQEAFNQRRTAIQENISGTYLPPEKKNLHQESYKSCQYNTLNVLMNGNIHTQSPVLSRVHMGTSCAFLFLLPSPYPESKHTSPLITMKAGVKSMTFPGSAKLMPQASENQELDTPKNRKKKAALDYLCRIPSPPALTPIRSFVSSSLQKAFHPPRSCVKLQSGENPVVPTVGNNAVLGIQSKKDEGPAAFNEEDSVADEELAMINTQAFLVGLRRDKRPSLLDKTASLKGHVPSERFLEEKLLSVLKEQASSNSERNATSLENKSCDKSRTCVKPCEHSNDSIAEETSEIIPGCHGGESAVENQSKNSSLCHKKLQQKKRRKYY","is_aligned":0},"location":"JH127744.1:299190-332700","id":[{"source":"EnsEMBL","accession":"ENSLACP00000008815"}]},"branch_length":0.314542,"id":{"source":"EnsEMBL","accession":"ENSLACG00000007788"},"confidence":{},"taxonomy":{"scientific_name":"Latimeria chalumnae","common_name":"Coelacanth","id":7897}}],"confidence":{"bootstrap":1},"taxonomy":{"scientific_name":"Sarcopterygii","common_name":"Lobe-finned fish","timetree_mya":414.9,"id":8287}}],"confidence":{},"taxonomy":{"scientific_name":"Euteleostomi","common_name":"Bony vertebrates","timetree_mya":441,"id":117571}},"rooted":1,"id":"ENSGT00390000003602","type":"gene tree"}\n' |
b |
diff -r 76b2c482f1e8 -r e5dd4bd78bbc test-data/genetree.phyloxml --- a/test-data/genetree.phyloxml Thu Aug 11 14:29:50 2016 -0400 +++ b/test-data/genetree.phyloxml Mon Dec 12 07:47:42 2016 -0500 |
b |
b'@@ -4,171 +4,52 @@\n <phylogeny rooted="true" type="gene tree">\n <clade branch_length="0">\n <taxonomy>\n+ <scientific_name>Euteleostomi</scientific_name>\n <id>117571</id>\n- <scientific_name>Euteleostomi</scientific_name>\n+ <common_name>Bony vertebrates</common_name>\n </taxonomy>\n- <clade branch_length="0.153275">\n- <confidence type="bootstrap">95</confidence>\n+ <clade branch_length="0.149761">\n+ <confidence type="bootstrap">92</confidence>\n <taxonomy>\n+ <scientific_name>Neopterygii</scientific_name>\n <id>41665</id>\n- <scientific_name>Neopterygii</scientific_name>\n+ <common_name>Ray-finned fishes</common_name>\n </taxonomy>\n- <clade branch_length="0.155187">\n- <confidence type="bootstrap">19</confidence>\n+ <clade branch_length="0.148891">\n+ <confidence type="bootstrap">33</confidence>\n <taxonomy>\n+ <scientific_name>Clupeocephala</scientific_name>\n <id>186625</id>\n- <scientific_name>Clupeocephala</scientific_name>\n+ <common_name>Teleost fishes</common_name>\n </taxonomy>\n- <clade branch_length="0.133192">\n- <confidence type="bootstrap">18</confidence>\n+ <clade branch_length="0.181209">\n+ <confidence type="bootstrap">46</confidence>\n <taxonomy>\n- <id>123368</id>\n- <scientific_name>Acanthomorphata</scientific_name>\n+ <scientific_name>Otophysi</scientific_name>\n+ <id>186626</id>\n+ <common_name>Teleost fishes</common_name>\n </taxonomy>\n- <clade branch_length="0.374304">\n- <name>ENSGMOG00000009699</name>\n+ <clade branch_length="0.421267">\n+ <name>ENSAMXG00000013027</name>\n <taxonomy>\n- <id>8049</id>\n- <scientific_name>Gadus morhua</scientific_name>\n+ <id>7994</id>\n+ <scientific_name>Astyanax mexicanus</scientific_name>\n+ <common_name>Cave fish</common_name>\n </taxonomy>\n <sequence>\n- <accession source="Ensembl">ENSGMOP00000010385</accession>\n+ <accession source="Ensembl">ENSAMXP00000013440</accession>\n <name>brca2-201</name>\n- <location>GeneScaffold_2233:16156-29802</location>\n- <mol_seq is_aligned="0">LARDLQDMRLRKKKRQTVRPLPGSLFLAKASGGARIPLRAALRQLYQHGVHQPVWTVTAENAESFRLSFRRFFRWGSSVSRGVQLADGGWLVPRDDWTLGKEEFYRALCDSPGVDVKLLSQEWAYNHYRWVVWKLASMERSFPLTMASLWLNPEQILLQLKYRYDVEVDHSRRPALRKITERDDAAAKTLVLCVCGVVPGADQQPQGSHAPPPGVVWLTDGWYAIKAQLDAPLTAMLRRGGAGGKLVVYGAELVGSQDGCSPLEAPEGLMLKIGANSCRRARWDAKLGFQRDPRPFLLRLSSLFSTGGAVGCVDLLILRSYPVLWMEKKQDGVFVFRSGRAEEREARRFDDHNNKTMEALYAKIQADIQREDKGSARERNSGEELYEAFENDPAYLEACLNDQQLEVLQSYRRSVLEKRQAGLQERCRRALEQAQESQGGCPRRDVTPVWKLCVVDARAPPGYMLNVWRPPADLQAQLKEGARYRVYNLSVTAGKKRNPGASVQLTATSKTHFQEVQVGQDWLSDHFQARQAVHFQELQRPEVQSACGEVDLVGYVVTTADTHGTSPVVYLVDGDLNLVKVRCFSSLLQWGLEELVKPATLLALSNLQLSARRATTLPVLYASDLTAFSSNPREAHLQSSHSNADRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXRRRALDYLSRMPSPPPLGPLGSLASPACVKKTFNPPRRSTTPAAVATTRQTPAHGPRVGPWQEEEWENDEGLAQIDTQVL</mol_seq>\n+ <location>KB882257.1:1212119-1232402</location>\n+ <mol_seq is_aligned="0">MIEDFFHQIETEELGPLSSDWFKELTAKASKDESFPGAVEHEASSRTEDGTFRAPQETPALESQMSSTPRLFRVRGPLSPDSVLGRSPNPAFQQQGMQTPLSTLPWTDSSPCLFGSAKESQRFEEDSEPLKKHDYFGLLDTPKSSLVQDSSAKRISESLGAQLHPDLSWSSNFNTPGAMSPTVILTKKDAQPSPVSFLKDKEVIIVRKLFPSLSKGTESTSEITSTAHNNASLTEENAQTKGPDESFDNVEGLWRQTVPDAINDSDVRDTVESVLDGAEDVLSIFFSNSSSALRRVKSKERTKKRVNGVSKDVKPAALATQHYTDRTGTATEAKSPPKNKDFSQWSPLSLSQVSDAKAKQDCTSDLANKHLNFESVNSDSGEDALKDSITQLGQFNTCEVEKQKSSPDSYPEKSQLRSSLLAASPALTFSRKPRKFVYQVQSPFPPTKENDLTGKHYREPFLTAENKKHIKDDPQTSDTVGDCLVHPKEPPVKQGNPGALNVDHGLDMTQLCNAFAEDFTQEIRSDAIKIDEVQTKAESVLHSDDGAEHLEFPANHKEASALAEESINLTSRSRMENTLSESLKHENGYPA'..b'IDNDKELLSHAKEESRKSNIKHSSKLINNENCGKTEHTSEHLVCQSNVSLPFIKTLGKNATILVSGNEKSQNQELQLDCLKCESTDVKHTPQKETINDNSKCNESSLSELSMKGFQTASGRNIMMSESSIQKARNIFAEEHEDSFTLRCNIQNTIQIPQPVNEPTQFPYVNLGPKPTTTSGWQEKNILRRSTEKGFMPGFCTAGGKKVSVSDNSLAKAHKLFQEECTFSKEGKLDEVKQNKLMNSEPLSLLTCESVLKQSDGFIEDISTSRNALEIRPELYPEGMCSNRASNGSGNNSEFTAGEGISININQSSLLTTGNVLKNLPSESSGHDVYSVTEHLSTVVKVKRYNDSGHFVNQNLAECNDNHVLSTQKNTANISNRNEDCTSLAPLSFSTASGKSVTVSHDSLQKARLMLSEAANDVTVDTSKQEAAYITPAIRKTEAEKEQNTVDDSDRVNANTFSFSTASGKKVNISGNSLKQVRAVCLSSDPKETSAALFNVEKSVFNEDVKDVSLLQPNVTMPKAVSFSTASGKTVQLSDESLKKARVIFSEIDTCPLMQQQTNESTVEEIVIGGGMTKSKQMPLTTEKVETTRKNNGTFGFNTASGKQVSVSESALQKVKDIFQEFDDPDNYEQNKSLVRLPVSSKIKESTPGTKRLVQTAGSSYKNDNLQCKAGNLRTFQDKQAGKKSLTYSEAAISPIESSVPIYEMQVMLKHTNNQACKYQPRVEVPLQDQRWQNILEIELPATCAPAFRETHNILFFGDLQHSTHFDICSLYSGKNPAVKHQLASHSKMQTLVISGRDSSGTLTLQFTLRIVILHTVNNQYSLNKQLFTFSSALRQVTCIPTQAHLHSKVKIFHQSLPIKSPDVASDSTSKSYSPTAAKETINCSSASKIPAKKFVPPFKKTVATLADNQSNSVQNGSSDGLIESIVYPKEDKVETICSSKDQFDDSDILQMTSNLRCSKDLQEMRIRKKLRQKIKPHPGSLYRLKMSHVKRISLQSAVAERCPTLYSREQLYRYGIVKNHIGVSSENALSFQFHCSNYFTKELLLSGNGVQLADGGWLIPTEQGNAGKEEIYRAFCDTPGVDPKLISAEWVHNHYRWIVWKLAAMEVRFPKTFACRCLTPERVLLQLKYRYDVEIDKSQRSAIKKIMERDDSPAKTLVLCIAKIISQGTRLPNACSNKTEPADSKESSAVIEVTDSWYGIKVLLDPCLTALLHKGRLFIGQKLIVHGAELIGSDDACSPLEAPESLMLKIAANSTRPVRWHTKLGYFKDPRPFCLHLSSLLSEGGVVGCVDVVIQRIYPMQWMEKMANGLYVFRNDRAEEREAEKHSANQQKKLEMLFSKIQAEFEQREVTCNRRKGLRRRSLNAQQMQTLQDGAEIYEAIQNESDPGYLESYLSAEQLKALNHHRQLLNDKKQALIQAEFRKAIECSEQDANGCTRRDVTPVWKLRIADYRNYETDAAYILNIWRPLPDVLSLLKEGCRYKMYHLAASTSKGKSLAADLQLTATKKTRFQQLQLSESILEQIYSPREVTDFSRFQEPLFSAPYAEVDLVGLIISIYKKTGAAPVVYISDESHNIVALKFWTDLGQLGLEEITKPRTYISASNLRWRSDCIEGIPTLYVGDLANISSNPKESHLQRAIQKLKLSVQNVQDFWNSSQTALMKTLQINSTDTTECSKNPTTPTWKSDVSARSGYLTPLHHSGKRLLNSVHTSDPQTENPGCSKEIQLKTCKKRKALDFLNRIPSPPPVTPVRPFVSPSLQKAFRPPRSCSVQKLGPETKGNTENVQGTTPECTKDLAKLEGEFVADEELAMINTQALLLGLEEEKKKTEQKTSRTAGKMTAHESPIENASPVPAQEQQTEEALNIPVGNSEKSYLCLRKRKRK</mol_seq>\n- </sequence>\n- <property datatype="xsd:string" ref="Compara:genome_db_name" applies_to="clade">xenopus_tropicalis</property>\n- </clade>\n+ </clade>\n+ <clade branch_length="0.314542">\n+ <name>ENSLACG00000007788</name>\n+ <taxonomy>\n+ <id>7897</id>\n+ <scientific_name>Latimeria chalumnae</scientific_name>\n+ <common_name>Coelacanth</common_name>\n+ </taxonomy>\n+ <sequence>\n+ <accession source="Ensembl">ENSLACP00000008815</accession>\n+ <location>JH127744.1:299190-332700</location>\n+ <mol_seq is_aligned="0">MEWEAVESVKALMRDDELTDAGLDASKDSLNRACRRQSGGNFRARKRMRLEQVSADEPPVKRQLLAEFDRTVENGHKSLQKPLICTPNGTLKDRRKFMYSVPLKPVVCGPWSNNSKTGQQVTKPSITLPGRGVETFQPKNHIAPSPVYDPPSNRRGPVFAPPFHGATFRGLQKPSASHTSSKTAKTFVPPFKMKASASHTVHFSSKVINTCEKILENLVYLKPSLASCNIFQSLEEMTANLQCARDLQEMRLRKKQRQNIRPQPGSLYLAKTSGVARVSLKAATGNQCPSSYSTEQLYVHGVGKSTLKVRSENAESFQFSCSDYFGKDVLLAGNGLKLADGGWLIPSDKGMVGKEEFYRALCDTPGVAPKLISESWVYNHYRWIVWKLAAMEAAFPKEFGNRCLTPERVLLQLKYRYDIEVDKCRRSTVKKIMERDDTAAKTLVLCISKLISVEDRFKQTKNKNEKGAEEARKEAVAGVIETTDGWYGIKVLLDPPLTVLVQRGRLSVGCKIITHGAEIIGSQDACTPLEAPECLMLKISANSTRPACWSAKLGFHRDPRPFPLPLASLFNDGGLVGCVDVVVVRLYPIQWMEKKSDGIFVFRNDRAEEREAQRQVENQQRKMESLFAKIQTEFEQKYEAKSKRRGQKAQKFSKQEIQALQDGAELNEAIENSMDPGYFEACLREEQLKVLHGHRQMLNEKKQAEFQAEFKKALESAEQEGKSCCKRGVTPVWKLRIVDYRKPSAAEYILNIWRPLADLHSLLKEGNRYRIYQLLASQSKGRTTTADIQLTATKKTQYQQFQSFPELISELYSPRKAVKFNMLMDPTFRPAYAEVDLVGYTISIEGKPGVAPVVYLSDESHNFVAIKVWTALNQLAVEDIVKPFSLIAASNLQWRSDSRSIIPMLYAGDLSIFSSNPKEGHLQEAFNQRRTAIQENISGTYLPPEKKNLHQESYKSCQYNTLNVLMNGNIHTQSPVLSRVHMGTSCAFLFLLPSPYPESKHTSPLITMKAGVKSMTFPGSAKLMPQASENQELDTPKNRKKKAALDYLCRIPSPPALTPIRSFVSSSLQKAFHPPRSCVKLQSGENPVVPTVGNNAVLGIQSKKDEGPAAFNEEDSVADEELAMINTQAFLVGLRRDKRPSLLDKTASLKGHVPSERFLEEKLLSVLKEQASSNSERNATSLENKSCDKSRTCVKPCEHSNDSIAEETSEIIPGCHGGESAVENQSKNSSLCHKKLQQKKRRKYY</mol_seq>\n+ </sequence>\n+ <property datatype="xsd:string" ref="Compara:genome_db_name" applies_to="clade">latimeria_chalumnae</property>\n </clade>\n </clade>\n </clade>\n' |