Repository 'ensembl_get_sequences'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/ensembl_get_sequences

Changeset 0:76b2c482f1e8 (2016-08-11)
Next changeset 1:e5dd4bd78bbc (2016-12-12)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
added:
get_feature_info/get_feature_info.py
get_genetree/get_genetree.py
get_sequences/get_sequences.py
get_sequences/get_sequences.xml
test-data/genetree.json
test-data/genetree.phyloxml
test-data/input.txt
test-data/out.json
test-data/sequences.fasta
tool_dependencies.xml
b
diff -r 000000000000 -r 76b2c482f1e8 get_feature_info/get_feature_info.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_feature_info/get_feature_info.py Thu Aug 11 14:29:50 2016 -0400
[
@@ -0,0 +1,41 @@
+# A simple tool to connect to the Ensembl server and retrieve feature
+# information using the Ensembl REST API.
+import json
+import optparse
+from urlparse import urljoin
+
+import requests
+
+parser = optparse.OptionParser()
+parser.add_option('-i', '--input', help='List of Ensembl IDs')
+parser.add_option('-e', '--expand', type='choice', choices=['0', '1'],
+                  default='0',
+                  help='Expands the search to include any connected features. e.g. If the object is a gene, its transcripts, translations and exons will be returned as well.')
+
+parser.add_option('-s', '--species', type='choice',
+                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
+                  help='Specify the genome databases for vertebrates and other eukaryotic species')
+
+parser.add_option('-f', '--format', type='choice',
+                  choices=['full', 'condensed'], default='full',
+                  help='Specify the formats to emit from this endpoint')
+options, args = parser.parse_args()
+if options.input is None:
+    raise Exception('-i option must be specified')
+
+
+server = 'http://rest.%s.org' % options.species
+ext = 'lookup/id'
+
+headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
+params = dict((k, getattr(options, k)) for k in ['format', 'expand'])
+with open(options.input) as f:
+    ids = [line.strip() for line in f]
+data = {'ids': ids}
+r = requests.post(urljoin(server, ext), params=params, headers=headers,
+                  data=json.dumps(data))
+
+if not r.ok:
+    r.raise_for_status()
+
+print r.text
b
diff -r 000000000000 -r 76b2c482f1e8 get_genetree/get_genetree.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_genetree/get_genetree.py Thu Aug 11 14:29:50 2016 -0400
[
@@ -0,0 +1,58 @@
+# A simple tool to connect to the Ensembl server and retrieve genetree using
+# the Ensembl REST API.
+import optparse
+from urlparse import urljoin
+
+import requests
+
+parser = optparse.OptionParser()
+parser.add_option('--id_type', type='choice', default='gene_id',
+                  choices=['gene_id', 'gene_tree_id'], help='Input type')
+parser.add_option('-i', '--input', help='Ensembl ID')
+parser.add_option('--format', type='choice',
+                  choices=['json', 'orthoxml', 'phyloxml', 'nh'],
+                  default='json', help='Output format')
+parser.add_option('-s', '--sequence', type='choice',
+                  choices=['protein', 'cdna', 'none'], default='protein',
+                  help='The type of sequence to bring back. Setting it to none results in no sequence being returned')
+
+parser.add_option('-g', '--species', type='choice',
+                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
+                  help='Specify the genome databases for vertebrates and other eukaryotic species')
+
+parser.add_option('-a', '--aligned', type='choice', choices=['0', '1'],
+                  default='0', help='Return the aligned string if true. Otherwise, return the original sequence (no insertions)')
+parser.add_option('-c', '--cigar_line', type='choice', choices=['0', '1'],
+                  default='0',
+                  help='Return the aligned sequence encoded in CIGAR format')
+parser.add_option('--nh_format', type='choice',
+                  choices=['full', 'display_label_composite', 'simple', 'species', 'species_short_name', 'ncbi_taxon', 'ncbi_name', 'njtree', 'phylip'],
+                  default='simple',
+                  help='The format of a NH (New Hampshire) request')
+options, args = parser.parse_args()
+if options.input is None:
+    raise Exception('-i option must be specified')
+
+server = 'http://rest.%s.org' % options.species
+
+if options.id_type == 'gene_id':
+    ext = 'genetree/member/id'
+elif options.id_type == 'gene_tree_id':
+    ext = 'genetree/id'
+
+if options.format == 'json':
+    content_type = 'application/json'
+elif options.format == 'orthoxml':
+    content_type = 'text/x-orthoxml+xml'
+elif options.format == 'phyloxml':
+    content_type = 'text/x-phyloxml+xml'
+elif options.format == 'nh':
+    content_type = 'text/x-nh'
+headers = {'Content-Type': content_type}
+params = dict((k, getattr(options, k)) for k in ['sequence', 'aligned', 'cigar_line', 'nh_format'])
+r = requests.get(urljoin(server, '/'.join([ext, options.input])), params=params, headers=headers)
+
+if not r.ok:
+    r.raise_for_status()
+
+print r.text
b
diff -r 000000000000 -r 76b2c482f1e8 get_sequences/get_sequences.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_sequences/get_sequences.py Thu Aug 11 14:29:50 2016 -0400
[
@@ -0,0 +1,46 @@
+# A simple tool to connect to the Ensembl server and retrieve sequences using
+# the Ensembl REST API.
+import json
+import optparse
+from itertools import islice
+from urlparse import urljoin
+
+import requests
+
+parser = optparse.OptionParser()
+parser.add_option('-i', '--input', help='List of Ensembl IDs')
+
+parser.add_option('-s', '--species', type='choice',
+                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
+                  help='Specify the genome databases for vertebrates and other eukaryotic species')
+
+parser.add_option('-t', '--type', type='choice',
+                  choices=['genomic', 'cds', 'cdna', 'protein'],
+                  default='genomic', help='Type of sequence')
+parser.add_option('--expand_3prime', type='int', default=0,
+                  help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type')
+parser.add_option('--expand_5prime', type='int', default=0,
+                  help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type')
+options, args = parser.parse_args()
+if options.input is None:
+    raise Exception('-i option must be specified')
+
+server = 'http://rest.%s.org' % options.species
+ext = 'sequence/id'
+
+headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'}
+params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime'])
+with open(options.input) as f:
+    # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl
+    while True:
+        ids = [line.strip() for line in islice(f, 50)]
+        if not ids:
+            break
+        data = {'ids': ids}
+        r = requests.post(urljoin(server, ext), params=params, headers=headers,
+                          data=json.dumps(data))
+
+        if not r.ok:
+            r.raise_for_status()
+
+        print r.text
b
diff -r 000000000000 -r 76b2c482f1e8 get_sequences/get_sequences.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_sequences/get_sequences.xml Thu Aug 11 14:29:50 2016 -0400
[
@@ -0,0 +1,61 @@
+<tool id="get_sequences" name="Get sequences by Ensembl ID" version="0.1.1">
+    <description>using REST API</description>
+    <requirements>
+        <requirement type="package" version="2.7">requests</requirement>
+    </requirements>
+    <command>
+<![CDATA[
+python $__tool_directory__/get_sequences.py
+-s $species_selector
+--expand_3prime $expand_3prime
+--expand_5prime $expand_5prime
+-t $type_selector
+-i "$input"
+> "$output"
+]]>
+    </command>
+
+    <inputs>
+        <param name="input" type="data" format="txt" label="List of Ensembl IDs" />
+        <param name="species_selector" type="select" label="Select Species">
+            <option value="ensembl" selected="true">Vertebrates</option>
+            <option value="ensemblgenomes">Other species</option>
+        </param>
+        <param name="expand_3prime" type="integer" value="0" min="0" label="expand_3prime" help="Expand each sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type." />
+        <param name="expand_5prime" type="integer" value="0" min="0" label="expand_5prime" help="Expand each sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type." />
+        <param name="type_selector" type="select" label="Type" help="Type of sequence. Defaults to genomic where applicable, i.e. not translations. cDNA refers to the spliced transcript sequence with UTR; CDS refers to the spliced transcript sequence without UTR">
+            <option value="genomic" selected="true">Genomic</option>
+            <option value="cds">CDS</option>
+            <option value="cdna">cDNA</option>
+            <option value="protein">Protein</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="fasta" label="$(tool.name) on ${on_string}" />
+    </outputs>
+
+    <tests>
+          <test>
+                <param name="input" ftype="txt" value="input.txt" />
+                <param name="expand_3prime" value="0" />
+                <param name="expand_5prime" value="0" />
+                <param name="type_selector" value="genomic" />
+                <output name="output" file="sequences.fasta" />
+          </test>
+     </tests>
+
+    <help>
+<![CDATA[
+**What it does**
+
+Retrieves FASTA sequences from Ensembl using its REST API.
+
+Uses the `"POST sequence/id"`_ API endpoint.
+
+.. _"POST sequence/id": http://rest.ensembl.org/documentation/info/sequence_id_post
+]]>
+    </help>
+    <citations>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 76b2c482f1e8 test-data/genetree.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genetree.json Thu Aug 11 14:29:50 2016 -0400
[
b'@@ -0,0 +1,1 @@\n+{"tree":{"events":{"type":"speciation"},"branch_length":0,"children":[{"events":{"type":"speciation"},"branch_length":0.153275,"children":[{"events":{"type":"speciation"},"branch_length":0.155187,"children":[{"events":{"type":"speciation"},"branch_length":0.133192,"children":[{"events":{"type":"speciation"},"branch_length":0.201095,"children":[{"events":{"type":"speciation"},"branch_length":0.015782,"children":[{"events":{"type":"speciation"},"branch_length":0.217419,"children":[{"sequence":{"mol_seq":{"seq":"QLARDMQDMRIRKKKRQTIRPLPGSLFQKKSSGVARIPFKAAVNGKPPARYTAKPLCGLGVPLNVLEITSETAESFRFSLQHFVKLESLIDKGGIQLADGGWLIPTNDGTAGKEEFYRALCDTPGVDPKLMSEEWVYNHYRWIVWKQASMERSFPEEMGSLCLTPEQVLLQLKYRYDIEVDHSRRPALRKIMEKDDTAAKTLVLCVCGVVFRGSSPKNKSFGDISTPGADPKVENPCAVVWLTDGWYSIKAQLDGPLTSMLHRGRLPVGGKLIIHGAQLVGSENACSPLEAPVSLMLKICANSSRPARWDSKLGFHRDPRPFLLPVSSLYSSGGPVGCVDIIILRSYPILWMERKPEGGTVFRSGRAEEKEARRYNIHKEKAMEILFDKIKAEFEKEEKGNRKPQCRRTINGQNITSLQDGEELYEAVGDDPAFLEAHLTEKQVEVLQNYKRLVMEKQQAELQDRYRRAVESAEDGVGGCPKRDVAPVWRLCIADSMGHSGRVYQLSLWRPPSELQALLKEGCRYKVYNLTTLDSKKQGGNATVQLTATKKTQFEHLQGSEEWLSKHFQPRVATNFVRLQDPEFNPLCSEVDLTGYVITIIDGQGFSPAFYLADGKQNFVKVRCFSSFAQSGLEDVIKPRVLLALSNLQLRGQSTSPTPVVYAGDLTVFSTNPKEVHLQESFSQLKTLVQGQENFFVHAEEKLSQLMSDGLSAIASPAGQIQTPASTVKRRGDMTDVSSNIMVINKTSKVTCQQPGRSHRFSTPINRNSTAHSSAERNPSTIKKRKALDYLSHIPSPPPLSCLSTLSSPSVKKIFIPPRRTEIPGTLKTVKTPNQKPSNTPVDDQWVNDEELAMIDTQAL","is_aligned":0},"location":"scaffold_19:196046-199577","name":"brca2-201","id":[{"source":"EnsEMBL","accession":"ENSTRUP00000015030"}]},"branch_length":0.072273,"id":{"source":"EnsEMBL","accession":"ENSTRUG00000006177"},"confidence":{},"taxonomy":{"scientific_name":"Takifugu rubripes","id":31033}},{"sequence":{"mol_seq":{"seq":"VSFSSDTPRKPKAGSLSSEFTDRFLAQEALDCTKALLEDERLVDDPHMTGECLHRCPQFSLLVNLFVKPHTAVLIPEQPPLKRRLLEEFDRTDGSSRGSALNPEKCSPNGIMGDRRVFKCSVSFQPNITTPHRICSQKAERPVSFLSRRSGTNYVETSLPNTTPTKVSALRDSNEARLQKSNFIPPFIKNVKLDTPNSKTASTFVPPFKKSRNSSKTEEEEPKHHFIPPFTNPCATSSTKKHTAGHLHNVELARDMQGMRIRKKKRQTILPLPGSLFLKKSSGVTRIPLKSAVNGKPPARYTPKQLYGLGVPLNVLEITSETAGSFRFSLQQFVKLESLTDKGGIQLADGGWLIPRNDGTAGKEEFYRALCDTTGVDPKLISEEWVYNHYRWIVWKQASMERSFPEQLGSLCLTPEQVLLQLKYRYDIEVDQSRRPALRKIMERDDTAAKTLILCVCGVVSRGSSPQKQGLGGVAAPSSDPQVENPFAVVWLTDGWYSIKAQLDGPLTSMLNRGRLPVGGKLIIHGAQLVGSQDACSPLEAPESIMLKIFANSSRRARWDAKLGFYRDPRPFLLPVSSLYNSGGPVGCVDIIILRSYPTLWMERKPEGGTVFRSGRAEEKEARRYNVHKEKAMEILFDKIQAEFEKEERDNRKPRSRRRTIGDQDIKSLQDGEELYEAVGDDPAYLEAHLTEQQAETLQNYKRLLIEKKQAELQDRYRRAVETAEDGTGSCPKRDVAPVWRLSIADFMEKPGSVYQLNIWRPPSELQSLLKEGCRYKVYNLTTTDSKKQGGNTTVQLSGTKKTQFEDLQASEELLSTYFQPRVSATFIDLQDPEFHSLCGEVDLTGYVISIIDGQGFSPAFYLTDGKQNFVKVRCFSSFAQSGLEDVIKPSVLLALSNLQLRGQATSPTPVLYAGDLTVFSTNPKEVHLQESFSQLKTLVQ","is_aligned":0},"location":"16:4700614-4705074","name":"brca2-201","id":[{"source":"EnsEMBL","accession":"ENSTNIP00000002435"}]},"branch_length":0.113355,"id":{"source":"EnsEMBL","accession":"ENSTNIG00000016261"},"confidence":{},"taxonomy":{"scientific_name":"Tetraodon nigroviridis","id":99883}}],"confidence":{"bootstrap":100},"taxonomy":{"scientific_name":"Tetraodontidae","id":31031}},{"sequence":{"mol_seq":{"seq":"LPNVELAQDMQDMRIRKKKRQTIRPLPGSLFLTKTSGVTRIPLKAALVFLLQLYRHGVHQHVCEISSETAESFRFNLKQFIKREALLDGGGVQLADGGWLIPSKDGTAGKEEFYRALCDTPGVDPKLISDGWVDNHYRWVVWKQASMERSFPETMGGLCLTPEQVLLQLKYRYDVEVDHSRRPALRRITERDDTAAKTLVLCVCGVVSRSFDDSKTPRGADAGGGNPSAVVWLTDGWYAIRAQLDEPLTAMLRNGRVAVGSKLIVHGAQLVGSQEACSPLEAPEALMLKICANSSRPVRWDAKLGFHKDPRPFLLPLSCLYSSGGQVGCVDMIVLRSYPIQWMERKPEGGVVFRSVRAEEKEAKRFNGLKQKAMEILFAKIQDEFEKEDKGRTCDFTTQAISRQAIAGLQAGEELCEAVGEDPAHLEALLSEQQVETLNTYRRCVMEKKQAQLHDRFQRALESAEASEGSCPKREVTPVWRLGVADSRDQRGRVYQLNLWRPSSDLQALLKEGRRYKVYNLTTSDGKKHNGSSNVQLTGTKKTQFQDLQASREWLSTRFQPRVSACFVDLQNPEFQSLCGEVDLTGFVIQIVDGQGFSPAFYLADGELNFVKVRCFSSFAQSGLEDLVKPRVLLSLSNLQLRGQSASPTPVVYAGDLTVFSANPKDAHLQESLSQNKNLRQSQENFFLIAEETLSRLVQSDGRRPLSSPALHTRTPALATSMIQDTTASVKCVLLMQGASQQLVRSRGTFTPVSRKPPAANCSTEKDAGSVKRRRALNYLSHIPSPPPLLNLGSVASPCVNKTFNPPRRSGTPSTLKTVQTPAHKAQKVDSLVEDEWVNDEELAMIDTQAL","is_aligned":0},"location":"groupI:13362884-13366744","name'..b'LQLDCLKCESTDVKHTPQKETINDNSKCNESSLSELSMKGFQTASGRNIMMSESSIQKARNIFAEEHEDSFTLRCNIQNTIQIPQPVNEPTQFPYVNLGPKPTTTSGWQEKNILRRSTEKGFMPGFCTAGGKKVSVSDNSLAKAHKLFQEECTFSKEGKLDEVKQNKLMNSEPLSLLTCESVLKQSDGFIEDISTSRNALEIRPELYPEGMCSNRASNGSGNNSEFTAGEGISININQSSLLTTGNVLKNLPSESSGHDVYSVTEHLSTVVKVKRYNDSGHFVNQNLAECNDNHVLSTQKNTANISNRNEDCTSLAPLSFSTASGKSVTVSHDSLQKARLMLSEAANDVTVDTSKQEAAYITPAIRKTEAEKEQNTVDDSDRVNANTFSFSTASGKKVNISGNSLKQVRAVCLSSDPKETSAALFNVEKSVFNEDVKDVSLLQPNVTMPKAVSFSTASGKTVQLSDESLKKARVIFSEIDTCPLMQQQTNESTVEEIVIGGGMTKSKQMPLTTEKVETTRKNNGTFGFNTASGKQVSVSESALQKVKDIFQEFDDPDNYEQNKSLVRLPVSSKIKESTPGTKRLVQTAGSSYKNDNLQCKAGNLRTFQDKQAGKKSLTYSEAAISPIESSVPIYEMQVMLKHTNNQACKYQPRVEVPLQDQRWQNILEIELPATCAPAFRETHNILFFGDLQHSTHFDICSLYSGKNPAVKHQLASHSKMQTLVISGRDSSGTLTLQFTLRIVILHTVNNQYSLNKQLFTFSSALRQVTCIPTQAHLHSKVKIFHQSLPIKSPDVASDSTSKSYSPTAAKETINCSSASKIPAKKFVPPFKKTVATLADNQSNSVQNGSSDGLIESIVYPKEDKVETICSSKDQFDDSDILQMTSNLRCSKDLQEMRIRKKLRQKIKPHPGSLYRLKMSHVKRISLQSAVAERCPTLYSREQLYRYGIVKNHIGVSSENALSFQFHCSNYFTKELLLSGNGVQLADGGWLIPTEQGNAGKEEIYRAFCDTPGVDPKLISAEWVHNHYRWIVWKLAAMEVRFPKTFACRCLTPERVLLQLKYRYDVEIDKSQRSAIKKIMERDDSPAKTLVLCIAKIISQGTRLPNACSNKTEPADSKESSAVIEVTDSWYGIKVLLDPCLTALLHKGRLFIGQKLIVHGAELIGSDDACSPLEAPESLMLKIAANSTRPVRWHTKLGYFKDPRPFCLHLSSLLSEGGVVGCVDVVIQRIYPMQWMEKMANGLYVFRNDRAEEREAEKHSANQQKKLEMLFSKIQAEFEQREVTCNRRKGLRRRSLNAQQMQTLQDGAEIYEAIQNESDPGYLESYLSAEQLKALNHHRQLLNDKKQALIQAEFRKAIECSEQDANGCTRRDVTPVWKLRIADYRNYETDAAYILNIWRPLPDVLSLLKEGCRYKMYHLAASTSKGKSLAADLQLTATKKTRFQQLQLSESILEQIYSPREVTDFSRFQEPLFSAPYAEVDLVGLIISIYKKTGAAPVVYISDESHNIVALKFWTDLGQLGLEEITKPRTYISASNLRWRSDCIEGIPTLYVGDLANISSNPKESHLQRAIQKLKLSVQNVQDFWNSSQTALMKTLQINSTDTTECSKNPTTPTWKSDVSARSGYLTPLHHSGKRLLNSVHTSDPQTENPGCSKEIQLKTCKKRKALDFLNRIPSPPPVTPVRPFVSPSLQKAFRPPRSCSVQKLGPETKGNTENVQGTTPECTKDLAKLEGEFVADEELAMINTQALLLGLEEEKKKTEQKTSRTAGKMTAHESPIENASPVPAQEQQTEEALNIPVGNSEKSYLCLRKRKRK","is_aligned":0},"location":"GL172716.1:1071058-1096238","name":"brca2-201","id":[{"source":"EnsEMBL","accession":"ENSXETP00000060681"}]},"branch_length":0.774548,"id":{"source":"EnsEMBL","accession":"ENSXETG00000017011"},"confidence":{},"taxonomy":{"scientific_name":"Xenopus tropicalis","id":8364}}],"confidence":{"bootstrap":1},"taxonomy":{"scientific_name":"Tetrapoda","id":32523}},{"sequence":{"mol_seq":{"seq":"MEWEAVESVKALMRDDELTDAGLDASKDSLNRACRRQSGGNFRARKRMRLEQVSADEPPVKRQLLAEFDRTVENGHKSLQKPLICTPNGTLKDRRKFMYSVPLKPVVCGPWSNNSKTGQQVTKPSITLPGRGVETFQPKNHIAPSPVYDPPSNRRGPVFAPPFHGATFRGLQKPSASHTSSKTAKTFVPPFKMKASASHTVHFSSKVINTCEKILENLVYLKPSLASCNIFQSLEEMTANLQCARDLQEMRLRKKQRQNIRPQPGSLYLAKTSGVARVSLKAATGNQCPSSYSTEQLYVHGVGKSTLKVRSENAESFQFSCSDYFGKDVLLAGNGLKLADGGWLIPSDKGMVGKEEFYRALCDTPGVAPKLISESWVYNHYRWIVWKLAAMEAAFPKEFGNRCLTPERVLLQLKYRYDIEVDKCRRSTVKKIMERDDTAAKTLVLCISKLISVEDRFKQTKNKNEKGAEEARKEAVAGVIETTDGWYGIKVLLDPPLTVLVQRGRLSVGCKIITHGAEIIGSQDACTPLEAPECLMLKISANSTRPACWSAKLGFHRDPRPFPLPLASLFNDGGLVGCVDVVVVRLYPIQWMEKKSDGIFVFRNDRAEEREAQRQVENQQRKMESLFAKIQTEFEQKYEAKSKRRGQKAQKFSKQEIQALQDGAELNEAIENSMDPGYFEACLREEQLKVLHGHRQMLNEKKQAEFQAEFKKALESAEQEGKSCCKRGVTPVWKLRIVDYRKPSAAEYILNIWRPLADLHSLLKEGNRYRIYQLLASQSKGRTTTADIQLTATKKTQYQQFQSFPELISELYSPRKAVKFNMLMDPTFRPAYAEVDLVGYTISIEGKPGVAPVVYLSDESHNFVAIKVWTALNQLAVEDIVKPFSLIAASNLQWRSDSRSIIPMLYAGDLSIFSSNPKEGHLQEAFNQRRTAIQENISGTYLPPEKKNLHQESYKSCQYNTLNVLMNGNIHTQSPVLSRVHMGTSCAFLFLLPSPYPESKHTSPLITMKAGVKSMTFPGSAKLMPQASENQELDTPKNRKKKAALDYLCRIPSPPALTPIRSFVSSSLQKAFHPPRSCVKLQSGENPVVPTVGNNAVLGIQSKKDEGPAAFNEEDSVADEELAMINTQAFLVGLRRDKRPSLLDKTASLKGHVPSERFLEEKLLSVLKEQASSNSERNATSLENKSCDKSRTCVKPCEHSNDSIAEETSEIIPGCHGGESAVENQSKNSSLCHKKLQQKKRRKYY","is_aligned":0},"location":"JH127744.1:299190-332700","name":"BRCA2-201","id":[{"source":"EnsEMBL","accession":"ENSLACP00000008815"}]},"branch_length":0.318609,"id":{"source":"EnsEMBL","accession":"ENSLACG00000007788"},"confidence":{},"taxonomy":{"scientific_name":"Latimeria chalumnae","id":7897}}],"confidence":{"bootstrap":1},"taxonomy":{"scientific_name":"Sarcopterygii","id":8287}}],"confidence":{},"taxonomy":{"scientific_name":"Euteleostomi","id":117571}},"rooted":1,"id":"ENSGT00390000003602","type":"gene tree"}\n'
b
diff -r 000000000000 -r 76b2c482f1e8 test-data/genetree.phyloxml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genetree.phyloxml Thu Aug 11 14:29:50 2016 -0400
b
b'@@ -0,0 +1,1347 @@\n+<?xml version="1.0" encoding="UTF-8"?>\n+\n+<phyloxml xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org">\n+  <phylogeny rooted="true" type="gene tree">\n+    <clade branch_length="0">\n+      <taxonomy>\n+        <id>117571</id>\n+        <scientific_name>Euteleostomi</scientific_name>\n+      </taxonomy>\n+      <clade branch_length="0.153275">\n+        <confidence type="bootstrap">95</confidence>\n+        <taxonomy>\n+          <id>41665</id>\n+          <scientific_name>Neopterygii</scientific_name>\n+        </taxonomy>\n+        <clade branch_length="0.155187">\n+          <confidence type="bootstrap">19</confidence>\n+          <taxonomy>\n+            <id>186625</id>\n+            <scientific_name>Clupeocephala</scientific_name>\n+          </taxonomy>\n+          <clade branch_length="0.133192">\n+            <confidence type="bootstrap">18</confidence>\n+            <taxonomy>\n+              <id>123368</id>\n+              <scientific_name>Acanthomorphata</scientific_name>\n+            </taxonomy>\n+            <clade branch_length="0.374304">\n+              <name>ENSGMOG00000009699</name>\n+              <taxonomy>\n+                <id>8049</id>\n+                <scientific_name>Gadus morhua</scientific_name>\n+              </taxonomy>\n+              <sequence>\n+                <accession source="Ensembl">ENSGMOP00000010385</accession>\n+                <name>brca2-201</name>\n+                <location>GeneScaffold_2233:16156-29802</location>\n+                <mol_seq is_aligned="0">LARDLQDMRLRKKKRQTVRPLPGSLFLAKASGGARIPLRAALRQLYQHGVHQPVWTVTAENAESFRLSFRRFFRWGSSVSRGVQLADGGWLVPRDDWTLGKEEFYRALCDSPGVDVKLLSQEWAYNHYRWVVWKLASMERSFPLTMASLWLNPEQILLQLKYRYDVEVDHSRRPALRKITERDDAAAKTLVLCVCGVVPGADQQPQGSHAPPPGVVWLTDGWYAIKAQLDAPLTAMLRRGGAGGKLVVYGAELVGSQDGCSPLEAPEGLMLKIGANSCRRARWDAKLGFQRDPRPFLLRLSSLFSTGGAVGCVDLLILRSYPVLWMEKKQDGVFVFRSGRAEEREARRFDDHNNKTMEALYAKIQADIQREDKGSARERNSGEELYEAFENDPAYLEACLNDQQLEVLQSYRRSVLEKRQAGLQERCRRALEQAQESQGGCPRRDVTPVWKLCVVDARAPPGYMLNVWRPPADLQAQLKEGARYRVYNLSVTAGKKRNPGASVQLTATSKTHFQEVQVGQDWLSDHFQARQAVHFQELQRPEVQSACGEVDLVGYVVTTADTHGTSPVVYLVDGDLNLVKVRCFSSLLQWGLEELVKPATLLALSNLQLSARRATTLPVLYASDLTAFSSNPREAHLQSSHSNADRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXRRRALDYLSRMPSPPPLGPLGSLASPACVKKTFNPPRRSTTPAAVATTRQTPAHGPRVGPWQEEEWENDEGLAQIDTQVL</mol_seq>\n+              </sequence>\n+              <property datatype="xsd:string" ref="Compara:genome_db_name" applies_to="clade">gadus_morhua</property>\n+            </clade>\n+            <clade branch_length="0.201095">\n+              <taxonomy>\n+                <id>1489872</id>\n+                <scientific_name>Percomorphaceae</scientific_name>\n+              </taxonomy>\n+              <clade branch_length="0.015782">\n+                <confidence type="bootstrap">38</confidence>\n+                <taxonomy>\n+                  <id>1489922</id>\n+                  <scientific_name>Eupercaria</scientific_name>\n+                </taxonomy>\n+                <clade branch_length="0.159219">\n+                  <name>ENSGACG00000011490</name>\n+                  <taxonomy>\n+                    <id>69293</id>\n+                    <scientific_name>Gasterosteus aculeatus</scientific_name>\n+                  </taxonomy>\n+                  <sequence>\n+                    <accession source="Ensembl">ENSGACP00000015199</accession>\n+                    <name>brca2-201</name>\n+                    <location>groupI:13362884-13366744</location>\n+                    <mol_seq is_aligned="0">LPNVELAQDMQDMRIRKKKRQTIRPLPGSLFLTKTSGVTRIPLKAALVFLLQLYRHGVHQHVCEISSETAESFRFNLKQFIKREALLDGGGVQLADGGWLIPSKDGTAGKEEFYRALCDTPGVDPKLISDGWVDNHYRWVVWKQASMERSFPETMGGLCLTPEQVLLQLKYRYDVEVDHSRRPALRRITERDDTAAKTLVLCVCGVVSRSFDDSKTPRGADAGGGNPSAVVWLTDGWYAIRAQLDEPLTAMLRNGRVAVGSKLIVHGAQLVGSQEACSPLEAPEALMLKICANSSRPVRWDAKLGFHKDPRPFLLPLSCLYSSGGQVGCVDMIVLRSYPIQWMERKPEGGVVFRSVRAEEKEAKRFNGLKQKAMEILFAKIQDEFEKEDKGRTCDFT'..b'name>ENSXETG00000017011</name>\n+            <taxonomy>\n+              <id>8364</id>\n+              <scientific_name>Xenopus tropicalis</scientific_name>\n+            </taxonomy>\n+            <sequence>\n+              <accession source="Ensembl">ENSXETP00000060681</accession>\n+              <name>brca2-201</name>\n+              <location>GL172716.1:1071058-1096238</location>\n+              <mol_seq is_aligned="0">MAAPQLGKSVFYDLFSTHCSHSDLGPISLNWFEELTAEALPYKSRTCEDHECSLDDLHENNIKTPKLKSSIYSQLDSTPVIFKERLLSPLFASSLTELDKRQNATDNGNNVRLEKSNCTQMQHQASEVFSPSSRCLNESPAVIKEIFKTPLRNKYLHKTPQCDWKPDICSSLFCTPKLMKNQTGCIKESLGAEVDPEMSWSSSLATPPSPTVIIAHANDQPSGNKHAAIVQSLFPNCENMEANNLPPPETGTQNEDRQICAGLEKPGSCSASKTVPEASAISSRAAWKKTVANTVKDEEVSRTVENALEGMEDVLSIFFASEKTPGLRKLKNSTQARRKVESIKSQKCQVSLFKAEEGESILVPLDEHHKCDHEMLMKPEQDYKMNAVNQKIAKETTPYEWSQLNICELDITQSQDSTLHAANLCCEDITVNKSDTCNITEAEKQEEVNEPSTDNVLSNKVGKESTLNVNCINADLNNMASSSNNSVHSLNLSQCEKMDSSEISNSEVGCITKLTTHPAKITTGELSSVDDCAKKPQERVTISTSFSTLKKQSKFMYSVNTVLTGHIASNTSIATRRSLNSHLSSDEPKPHIKENQNEPESNAKYYSNGLQKPVFLEKENDNKGISLCKTISDGQNTSEGARAPDYFEERVPETKDRCKATLSIREKVVATANCFARKQLETDYPEDASAQHEALQFHAKQYVSCSMLNSNVSNDMQIKPQVLTQACDFRSSAENLVEKTKNQLRRNEDQSFSSTGIGKKPASGEKLVGDCNEGSFLHVKQEISAAQALVGNGYETRDTPDVTSDHTRSVISNHGKSLWEEETALTELPVPEILSKTFKGFKTASNKKIHISDKNIVKGCDLFKEIEFGIGDAKPCNEENKEPLKKTPFDVPGYETNLKGFKTASNKEINVSENKFAKGRLLFKDIEEESGQTTAIMAKDNELFIKPASSNKPDIFKSTCIKGMNPSENLDSMPPKKGDVQQSAGLKHSDQIDVFGFDDPPAKGNVQTLGYFNMSASETDFKGFKTASNKDIIISESTLAKGKLIFEDIEDTRYSETGRTIDCKAKGGALSTSNLVMHNTMCKNEPSTSENTNDKAPKRAKVQTPMAENSHLKEPSSDKNAMDAANHQMTLKASAYSPKAASSALPRYKKKPISTTKTSFQLNEHLTESQQAEISELSSILENADSQFDFTQFRKVPSVTEKQNSTEGGSESQNLNNSDVWKDVDFNDSFAAGRDHSEGMEAIPSSPGIKELPSAGECCSKETHDSMPDFVTLAPKQGFLVKQNERLFAGFNLASGKQVNIDNDVLKKAAELFNDIDNDKELLSHAKEESRKSNIKHSSKLINNENCGKTEHTSEHLVCQSNVSLPFIKTLGKNATILVSGNEKSQNQELQLDCLKCESTDVKHTPQKETINDNSKCNESSLSELSMKGFQTASGRNIMMSESSIQKARNIFAEEHEDSFTLRCNIQNTIQIPQPVNEPTQFPYVNLGPKPTTTSGWQEKNILRRSTEKGFMPGFCTAGGKKVSVSDNSLAKAHKLFQEECTFSKEGKLDEVKQNKLMNSEPLSLLTCESVLKQSDGFIEDISTSRNALEIRPELYPEGMCSNRASNGSGNNSEFTAGEGISININQSSLLTTGNVLKNLPSESSGHDVYSVTEHLSTVVKVKRYNDSGHFVNQNLAECNDNHVLSTQKNTANISNRNEDCTSLAPLSFSTASGKSVTVSHDSLQKARLMLSEAANDVTVDTSKQEAAYITPAIRKTEAEKEQNTVDDSDRVNANTFSFSTASGKKVNISGNSLKQVRAVCLSSDPKETSAALFNVEKSVFNEDVKDVSLLQPNVTMPKAVSFSTASGKTVQLSDESLKKARVIFSEIDTCPLMQQQTNESTVEEIVIGGGMTKSKQMPLTTEKVETTRKNNGTFGFNTASGKQVSVSESALQKVKDIFQEFDDPDNYEQNKSLVRLPVSSKIKESTPGTKRLVQTAGSSYKNDNLQCKAGNLRTFQDKQAGKKSLTYSEAAISPIESSVPIYEMQVMLKHTNNQACKYQPRVEVPLQDQRWQNILEIELPATCAPAFRETHNILFFGDLQHSTHFDICSLYSGKNPAVKHQLASHSKMQTLVISGRDSSGTLTLQFTLRIVILHTVNNQYSLNKQLFTFSSALRQVTCIPTQAHLHSKVKIFHQSLPIKSPDVASDSTSKSYSPTAAKETINCSSASKIPAKKFVPPFKKTVATLADNQSNSVQNGSSDGLIESIVYPKEDKVETICSSKDQFDDSDILQMTSNLRCSKDLQEMRIRKKLRQKIKPHPGSLYRLKMSHVKRISLQSAVAERCPTLYSREQLYRYGIVKNHIGVSSENALSFQFHCSNYFTKELLLSGNGVQLADGGWLIPTEQGNAGKEEIYRAFCDTPGVDPKLISAEWVHNHYRWIVWKLAAMEVRFPKTFACRCLTPERVLLQLKYRYDVEIDKSQRSAIKKIMERDDSPAKTLVLCIAKIISQGTRLPNACSNKTEPADSKESSAVIEVTDSWYGIKVLLDPCLTALLHKGRLFIGQKLIVHGAELIGSDDACSPLEAPESLMLKIAANSTRPVRWHTKLGYFKDPRPFCLHLSSLLSEGGVVGCVDVVIQRIYPMQWMEKMANGLYVFRNDRAEEREAEKHSANQQKKLEMLFSKIQAEFEQREVTCNRRKGLRRRSLNAQQMQTLQDGAEIYEAIQNESDPGYLESYLSAEQLKALNHHRQLLNDKKQALIQAEFRKAIECSEQDANGCTRRDVTPVWKLRIADYRNYETDAAYILNIWRPLPDVLSLLKEGCRYKMYHLAASTSKGKSLAADLQLTATKKTRFQQLQLSESILEQIYSPREVTDFSRFQEPLFSAPYAEVDLVGLIISIYKKTGAAPVVYISDESHNIVALKFWTDLGQLGLEEITKPRTYISASNLRWRSDCIEGIPTLYVGDLANISSNPKESHLQRAIQKLKLSVQNVQDFWNSSQTALMKTLQINSTDTTECSKNPTTPTWKSDVSARSGYLTPLHHSGKRLLNSVHTSDPQTENPGCSKEIQLKTCKKRKALDFLNRIPSPPPVTPVRPFVSPSLQKAFRPPRSCSVQKLGPETKGNTENVQGTTPECTKDLAKLEGEFVADEELAMINTQALLLGLEEEKKKTEQKTSRTAGKMTAHESPIENASPVPAQEQQTEEALNIPVGNSEKSYLCLRKRKRK</mol_seq>\n+            </sequence>\n+            <property datatype="xsd:string" ref="Compara:genome_db_name" applies_to="clade">xenopus_tropicalis</property>\n+          </clade>\n+        </clade>\n+      </clade>\n+    </clade>\n+    <property datatype="xsd:string" ref="Compara:gene_tree_stable_id" applies_to="phylogeny">ENSGT00390000003602</property>\n+  </phylogeny>\n+</phyloxml>\n'
b
diff -r 000000000000 -r 76b2c482f1e8 test-data/input.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.txt Thu Aug 11 14:29:50 2016 -0400
b
@@ -0,0 +1,2 @@
+ENSG00000157764
+ENSG00000248378
b
diff -r 000000000000 -r 76b2c482f1e8 test-data/out.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out.json Thu Aug 11 14:29:50 2016 -0400
[
@@ -0,0 +1,1 @@
+{"ENSG00000157764":{"source":"ensembl_havana","object_type":"Gene","logic_name":"ensembl_havana_gene","version":12,"species":"homo_sapiens","description":"B-Raf proto-oncogene, serine/threonine kinase [Source:HGNC Symbol;Acc:HGNC:1097]","display_name":"BRAF","assembly_name":"GRCh38","biotype":"protein_coding","end":140924764,"seq_region_name":"7","db_type":"core","strand":-1,"id":"ENSG00000157764","start":140719327},"ENSG00000248378":{"source":"havana","object_type":"Gene","logic_name":"havana","version":1,"species":"homo_sapiens","display_name":"RP11-5N11.5","assembly_name":"GRCh38","biotype":"lincRNA","end":31744451,"seq_region_name":"5","db_type":"core","strand":-1,"id":"ENSG00000248378","start":31743988}}
b
diff -r 000000000000 -r 76b2c482f1e8 test-data/sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequences.fasta Thu Aug 11 14:29:50 2016 -0400
b
b'@@ -0,0 +1,3435 @@\n+>ENSG00000157764 chromosome:GRCh38:7:140719327:140924764:-1\n+CGCCTCCCTTCCCCCTCCCCGCCCGACAGCGGCCGCTCGGGCCCCGGCTCTCGGTTATAA\n+GATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAA\n+CGGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGA\n+CCCTGCCATTCCGGAGGAGGTGAGTGCTGGCGCCACCCTGCCGCCCTCCCGACTCCGGGC\n+TCGGCGGCTGGCTGGTGTTTATTTTGGAAAGAGGCGGCGGTGGGGGCTTGATGCCCTCAG\n+CCACCTTCTCGGGCCAGCTCCGCGGGCTGGGAGGTGGGCATCGCCCCCGTGTCCCTCTCC\n+GTCATGCAGCGCCTTCCTACGTAAACACACACAATGGCCCGGGGGGTTTCCCTGGCCCCC\n+ACCCCAGATGTGGGGATTGGGGCAGCGGTGGTTGAGCGGGAGGCTATCAATAGGGGGCGA\n+AACTCAGGGTTGGTCCGAGAAGGTCACGATTGGCTGAAGTATCCAGCTCTGCATCTCTGT\n+GGGGTGGGGGCGGCGGCGGCCTCGACGTGGAGGATATAGGTTAGTTGCTGGGGCTGAGAC\n+AACAGCCCGAGTTACTGTCGCGTGTAATTCTTACATGGTCGTGGGGATGATGGGGCTCAT\n+CATTTCCTCTCTCCTCTCCCGGACTGCCCCCCTTCTCAGTCCGCTGCCCTTTTTCACTTT\n+TCTATTTGGGGATTTCTCTTCACCTGTTTTACCCAGCAAATTATTTTGATTTAGTCTTTA\n+CTTTTTCAATCCTAAATCGCAGTTTCCGATGCCTTTTCTGGTCTCTGGTCCTCTGTTCCT\n+AATGTTTGTCAGCGCTCTGTCGCTGATTGGTAACCCCCATTCTATTCCCATCTACCGCCC\n+GCTCATTTTCCAGTTGTCGGACCTGCCTGCCTTCTAACCCCAGCTCCCACTTAAGAGCAT\n+TTTTGCACTTCTCTTACCCTGGTCCTCTTGAGGCTCTGTACTTGATCTCACCACTCCCTA\n+ACATTGTTGTCTGTTGTTATCTTCACAAATCCTCCTGGACACTTTGGAGCTACTTGTTTT\n+CTGAGCCCAGAAGCTGTCAAGATTCCATCAGGTTTCACTTGGCTCTTTTCGCGCTTGCAC\n+TACTGGCACTTTTTGGCTAGTCGTCCATTGTGCATTCACACCTCTTTATTCCTACCCATT\n+TTTATAGGTCTGATTGATTTCTTAGTGTTGTCCTCCTTTTTGTCCTATTTTTTTCCTTTT\n+CCTTTTTCCTCTCCAGTCCTTGCTTCTCTCAGCCTGTTTTTGCATTAGTCAGCCTCTTAG\n+CACTGTGTCAAATTATTTACGTTTTTTTATTACATAAAATTTATTACAAATATTTGGTAT\n+TTTATTACAGAAAATAATACTTTATTATGCTTTACAAATAAGATATGGTATAATAATTGT\n+GGTTTACAGTTATTGATTAGGTAATGTGACTTACTCTGTTGACTTTGCTCGAAGTTCTCT\n+TTGCTACTTACTATTAACATCTAATTTCTCAATTCTCATAACATCTCATTCTCTCTGCAA\n+TTTTTTTTTTGCATCATCATCTTTGGAAATTCATCCAATATGCTTGCTTTATTCAGCATC\n+AGCTTGTTTATGATAATGTTTGTTTTCTACTCTTTATATCATCTTTGTTACATGCCCAAA\n+ATGTGTTCTGTACCATCATTTGATCTGTTCTAAAATTTCTCATTTTTAAGTTTCTTAAAA\n+TCATTCCACTTTTCAGTATGCATTTTTGCTTAGATCAGTTTCCTCTCATATCTGTTCCTT\n+TCCCCCAGCTTCTTGATTTCTAAGGAGAAAGCTCTTCTCTACTTCAATTTCCTAGTTTAT\n+TCTGTTTCCCTTGTTTCCAGTTACCATTCATTTTGCCTTGTTTCCTGGCTTTTGGTACTT\n+AACTTTCTGAAGCTTCCTCTTTTCTTCTCCACACCTCCACGTTCCTTCTTATTTATAAAC\n+ATCTTTGTTTCCTTTGACATGGAAATTTATTTTTAGGATACATTGTTTTTAATGGATAAA\n+TACTAGGGGTCACATCTGCTGTCTGTTTTCTCCAGGAATCGGATATGCCTTTGTCTTAAC\n+CAGGCACAGGTGCCTCTGGATTTTATTTTACTCTGTAATAGATGTGTAGTTTTGTTGAAT\n+TGTATCTTGTTTGAAGACTACTACAGAGTGGAACAATGAGTGAAGTAATAAGTAGGGGTT\n+ATGAAATTGTAATTCTCTGATTATAAAATTGTTTATCTTGGGAACTTTGCTGCAGAGTTA\n+TTAGAACCGTTTGCAATTCTGTAAAGAAGGCTTTTGTGAAGTAAAATCTCTACCCTTCTA\n+TTTTATTTGAAAGGGCCAGATTGTTTGGAACTGTACCCCCTGAAGAGTCTGATTTAGTAA\n+GTGAGAGCGAGGGCCATGGATTTCTGTATTTGGCACATGTCTTGAGCAGTTCCCATGTAC\n+CAATCCTTGAGAACCTCTAGGCTAGCTGAATTTAAGTATAAATTGCCAGTAATTGGAAAG\n+CATATTCATATCTTCTGAAACTATAAGGATACTCTCATTTTACTTGGTTAAAAAACAAGT\n+GTTTCCTACTGTCCTCTTTACCCAGGTTTTAATGTTTAGTGGTGAACAGTAGTTTTCCCT\n+CTACATTTTTTTCTGAACTGATAATAAATGTATTTGGCTGGGAGGGTGACATTGATTAAA\n+AAATGTATCTCTTGAATGTAAATATCAGTATTACAGATGATAAAATAAATTCCTCCAAGA\n+AATAATTTTAAATTTGAAGTTGATATTCAGTGGAAACTGAAATGTGCTGTGGTCTTTTAT\n+TTGAAGTCTTCCTTACATTCACTTAAAGGGATCTTTTACTGCAAATTACATGGAAAGAAT\n+GAAAAGGTTTGCTTGTGTGTAATGACACATTTTATTCTGAAGATTTATTTTACCTAACAG\n+TAAAATGTAGGTTTTTTTTTTTTAAATAAAAGTTTCCCAGAGGGAAATTTCATCTAAAAA\n+AAAAGTCTGATTTCAAAGGGAAAGCAAGTCATTATCAAAAATTAGAAAACTATAAGTACA\n+AAAAGTAAAAAATCATCAGTAATTTTGCCACTAAGATATTATTACTATAGACATTTTGGT\n+GTATTCCATCTGTTCTTTTTTAATGCTTTTATAACACTATGTAGTTTTGTATTTTAAAAA\n+ACTTAAAGCAAAAATTTCTACGTATTATTAGACATACTGTGATTTATTTAACTAATCATT\n+TTTTTGGGGTGTTAGGTTGTTTTTAATTTTTTACTGCCATCAAACATCTTGAACATAGGA\n+TGTAGATTTTAGTCTTTAAAATATGTTGGGGAATGAACAAATTTCACATCCTGTATTTGT\n+AGTATTAATACTTTGTAGGTGCTCAAAATAGAATATTCTGGTAAATGATTAGTGCTTATT\n+AAATATTTATCAAATGAATGTACTTGTACTTTTGGCATTAAACATTAACATCTGACCATT\n+TATATTTACCTGATTTTTTTTCTATGGCCATATGGTATGAAATAGTGTATGGTATAAATT\n+AACCATATGGTATAATAAATACATTTTTTTAAGTGTGATACCAGAGTGATATTTATTAAC\n+TGTTCTTCCTGTGCTGTTTCTGTAGAAGGGAGCTTCTCACAATTGCATTAGAATTACAAT\n+TTTATTATGTTCTGTTTTCAAGATCTCTGATCGTCAGTCTTAAACTGTTTAATTATAATA\n+ATGTATTGACTAGGGAATATTCTGGGATATAATCTCCTTTATAATGAGGTCCACTGTATT\n+AAAATACATCTT'..b'GAGGGTAGAATGATTACTCTTTTTGCAAGATTCTCTTCTTTGTCCAAGT\n+TGGCATTGTTAGTGCTAGGAATACCAGCACCTTGAGACGAGCAGATTCCAACCATTAGGC\n+TATAAACACCATAGCCAGAGATGGAAGGTTTACTGTGAGTATGAACAGCAAATAGCTTAC\n+AGGTCATGAGTTGAAATGGTGTAGGTGAGGCTCTAGAAAAATACCTTGACAATTTGCCAA\n+ATGATCTTACTGTGCCTTCATGATGCAATAAAAAAGCTAACATTTTAGCAGAAATCAGTG\n+ATTTGTGAAGAGAGCAGCCACTCTGGTTTAACTCAGCTGTGTTAATAATTTTTAGAGTGC\n+AATTTAGACTGCATAGGTAAATGCACTAAAGAGTTTATAGCCAAAATCACATTTAACAAT\n+GAGAAAACACACAGGTAAATTTTCAGTGAACAAAATTATTTTTTTAAAGCACATAATCCC\n+TAGTATAGTCAGATATATTTATCACATAGAGCAACTAGGTTGCAAATATAGTTCAGTGAC\n+ATTTCTAGAGAAACTTTTTCTACTCCCATAGGCTCTTCAAAGCATGGAACTTTTATACAA\n+CAGAAATGTTGACAGAAATTGCTGTAGTTTAGGGTTGAAGTACTGTATGATGGGCAGCAA\n+TCATGTATTAACTTAGAAGGGGAAATTGAAATATAGGACCGAATTTGGTTTTATCAGTTT\n+CCAGAGTACTGCTGCCAACCTAGACACTGATTTTTCAGAGTTTGAAATGTAAATTTCTTC\n+CCGGGACTTGATTGCACATGAAGCTGGACTGCGTTAGTCATCCTGTCCCAAAGCGCTGTG\n+GGGGCCAGGGTGGAGGTCTCAAGGCATCCTTTATGACCTGGCCATTGGATGTAAAAGAAA\n+ACATATTCCATGCTGTGGTTCTTGTATCTTGTTTCATTCCTCACCATTGAAAGAGAAAGT\n+CCATGTATTGTCTCCAGCACATCCTTGAAATGTTATACTGGGATGGATTACTGATGCCCA\n+TCGGTAGTTGAGCCCCAGAAGAGGGTAGTAGCATCTCTGCCTCAGGTGATGATTTGTAGC\n+TTGGCCAGAGGAGAGCGGAGTCACCAGTATATCTGTGGTCCATGTTGCTAGCTCTGGTAA\n+AATTAAAAATACTGGTAAGATGTTTGTTTTATTAGTACACTAGACAGTAAGCTCTGTTTT\n+GTTGTTTTCAAATAACCTATTTTCACTTTTGTTTGGGCAAAGACATTTAAATTGAAATTC\n+AATTCTAATTTTTGTTAATTGTGGAAAGGGTAATTAACAGTTCCTATCAGGTATTTTTAA\n+TGTGGAAAAGGACAGAAACCCAACTCCTAAAATCTTAAATTAAGGTAACAGTGCTTTAAA\n+AAAAAAAAATGCATGGGGCAATTAGTCGGCAACTCAATGAGTGACTAAAGTACTTTTATT\n+TAACATCCACAACTTCAACTGTTAAGTTTTATTAATTACTAAATCAGCTTTATTAAAATG\n+TTGACATTTATTTAGCTATTTTGAATAATTATAGTGACTTGACGAGTGTGTATGAGGACA\n+CAGCCAATGTAAGCCAGTGTATCCATTTTTTAGAGGTGCATTTTTTTTTAAAGAATTCTG\n+TAGATAGAAGTGCTCTGAAAACAACTAAAATATGTTTATTCATGGTAGTATCAAAAAATG\n+TTTGTACAAACCATCTGCTTCTCCCGGCCAGCCGAGTTCATTCTCCAGCACCGTGACCGC\n+TGGTTCTCATGTACAGCACATATGCGGGAGAGTTGGCAGAAAATTTGTGAAGAGATGCCG\n+CAAAGGAAGGGTCTGTTGACGGGTGGGATTGGGGGTTTTGATGAAGTTGCTTAGTCCTGG\n+TTTTGTTTTGAAAATTACTGCGTTGCATTTTTGTGTTAAGTTTTTGAACCCACGTGTGTT\n+TTGGTGGAGTATGAGTTGGAAGTCACTGCAAACTAGCATAAACAACAAAGCTCACAGAGT\n+AGGCACAGATGTAGAGAACAGAGACCAAAATGGGGTGAGGTGGCAGTAAATCTAGGATAG\n+GGAAAAATTAATGTGAGGGTGGGAAATAAACTGTAATTACCTGAAATCAAATGTAAGAGT\n+GCAATAAGTATGCTTTTTATTCTAAGCTGTGAACGGTTTTTTTAAGAATCATTCCTTCCT\n+AATACATTTGTGTATGTTCCATAGCTGATTAAAACCAGCTATATCAACATATAATGCCTT\n+TTTATTCATGTTAATGACCAACGTAAGTGGCTAGCCTTTATGTCTTATTTATCTTCATGT\n+TATGTTAGTTTACATACAGGGGTGTATGTCTCTGTGCTGTCCCCTTCTCCTGCCTTCATT\n+TTAAAATGCATCCATGGGTCCTCCGTGTTTCCTTTGGCCATGCCACATATATAGACTCAG\n+TTTGGCCTTCATGATATCGCCTGATTTTTGAGGACTGTATCACAGTGATATGTATTTGTG\n+GTAATCTCATTTGTTGGTTGTACATCTGATCCTTTCCTCAACATGGCAATTGCTGCCTTT\n+CCTAAGATAGGATCATACAACTGATCAGGGGATTGAATTTGATCATTCATCAACATGTGT\n+CTCTGAATTTTATTCAGTAGTTGTCATTGCTCTTTGGTTTAGACCAAGAAAAAGGAAATC\n+CCCCCTTTTCATGTATTCCTTGGTTTGAGGACATGACTCCTGTAAGGGAGAGGAAAGGGA\n+GATGCTTCCTGTTTGAACTGCAGTGAATTCACGGTTCCTGTTTCACCACTCCAAACCTTA\n+TGGCGACTCACACACACATTCCTCTTTTCTGTTACTGCCAAAGGTTCGGGTTTAGTACAC\n+TTCAGTTCCACTCAAGCATTGAAAAGGTTCTCGTGGAGTCTGGGGCGTGCCCAGTGAAAA\n+GATGGGGACTTTTTAATTGTCCACAGACCTCTCTATACCTGCTTTGCAAAAATTACAATG\n+GAGTAACTATTTTTAAAGCTTATTTTTCAATTCATAAAAAAGACATTTATTTTCAGTCAA\n+ATGGATGATGTCTCCCTCTTTTCCCCTATTCTCAATGTTTGCTTGAATCTTTTATTATTT\n+TTTTTAATTCTCCCCCATACCCACTTCCTGATACTTTGGTTCTCTTTCCTGCTCAGGTCC\n+CTTCATTTGTACTTTGGAGTTTTTCTCATGTAAATTTGTATAACAGAAAATATTGTTCAG\n+TTTGGATAGAAAGCATGGAGAATAAAAAAAGATAGCTGAAATTCAGATTGAAGAAATTTA\n+TTTCTGTGTAAAGTTATTTAAAAACTGTATTATATAAAAGGCAAAAAAAGTTCTATGTAC\n+TTGATGTGAATATGCGAATACTGCTATAATAAAGATTGACTGCATGGAGAAGTCTTCA\n+>ENSG00000248378 chromosome:GRCh38:5:31743988:31744451:-1\n+TTGGAAGTGAATTAAGACCCTCTCTTGGATACCAGCTGTGAAAGAACGGACTTTTTTTAC\n+CTAGTAAGGATGTGACAGACCTGCTCCTGACCCTTCTTTGTTAGTGGCCAGTAAATATAC\n+GCAAGGCAAAGTCCCACCCTAGTTTTGAAAAAAGCCAAACTAACAACTCTGCCCAATTTC\n+AAGGAACCGATATAATTTTCAGTACATGAGTGTGATTTAGAAATGGAAACAACATCAGAG\n+CAGGGTTAAATCAACAACCAAGCCATAGACCTTAAAAGGACCAGACTTCATGGTTCAATG\n+ATGCCAAGAAAAAGATTCCACCTATAAACTCTTTAGAAAGTAACCACCTCAGAAAGTTCC\n+AGACAAACCAGTTCTGCTTTGCAGATCAGCCCTGTATAAAGCTCTTACCACATTAGTAAG\n+AAGTTACTTGTGCTTTGGTTACAAACAGGCACAATTAAAGGGAA\n+\n'
b
diff -r 000000000000 -r 76b2c482f1e8 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu Aug 11 14:29:50 2016 -0400
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="requests" version="2.7">
+        <repository changeset_revision="7e330b122c8c" name="package_python_2_7_requests_2_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>