Repository 'ensembl_get_sequences'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/ensembl_get_sequences

Changeset 4:3b686142e9c2 (2018-04-13)
Previous changeset 3:bce784076824 (2017-03-24) Next changeset 5:0fa1d1cc417d (2019-10-31)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 95bab1105cf8a7b07c668f08f712399e8775a4ae
modified:
get_feature_info.py
get_sequences.xml
removed:
test-data/genetree.json
test-data/genetree.phyloxml
test-data/out.json
test-data/sequences.fasta
b
diff -r bce784076824 -r 3b686142e9c2 get_feature_info.py
--- a/get_feature_info.py Fri Mar 24 11:57:48 2017 -0400
+++ b/get_feature_info.py Fri Apr 13 09:43:51 2018 -0400
[
@@ -4,6 +4,7 @@
 
 import json
 import optparse
+from itertools import islice
 
 import requests
 from six.moves.urllib.parse import urljoin
@@ -31,13 +32,27 @@
 
 headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
 params = dict((k, getattr(options, k)) for k in ['format', 'expand'])
+
+first = True
+
+print('{')
+
 with open(options.input) as f:
-    ids = [line.strip() for line in f]
-data = {'ids': ids}
-r = requests.post(urljoin(server, ext), params=params, headers=headers,
-                  data=json.dumps(data))
+    while True:
+        ids = [line.strip() for line in islice(f, 50)]
+        if not ids:
+            break
+        if not first:
+            print(",")
+        data = {'ids': ids}
+        r = requests.post(urljoin(server, ext), params=params, headers=headers,
+                          data=json.dumps(data))
 
-if not r.ok:
-    r.raise_for_status()
+        if not r.ok:
+            r.raise_for_status()
 
-print(r.text)
+        print(r.text[1:-1])
+
+        first = False
+
+print('}')
b
diff -r bce784076824 -r 3b686142e9c2 get_sequences.xml
--- a/get_sequences.xml Fri Mar 24 11:57:48 2017 -0400
+++ b/get_sequences.xml Fri Apr 13 09:43:51 2018 -0400
b
@@ -42,7 +42,12 @@
                 <param name="expand_3prime" value="0" />
                 <param name="expand_5prime" value="0" />
                 <param name="type_selector" value="genomic" />
-                <output name="output" file="sequences.fasta" />
+                <output name="output" ftype="fasta">
+                    <assert_contents>
+                        <has_text text="ENSG00000157764" />
+                        <has_text text="ENSG00000248378" />
+                    </assert_contents>
+            </output>
           </test>
      </tests>
 
b
diff -r bce784076824 -r 3b686142e9c2 test-data/genetree.json
--- a/test-data/genetree.json Fri Mar 24 11:57:48 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1 +0,0 @@\n-{"tree":{"events":{"type":"speciation"},"branch_length":0,"children":[{"events":{"type":"speciation"},"branch_length":0.169242,"children":[{"events":{"type":"speciation"},"branch_length":0.117496,"children":[{"events":{"type":"speciation"},"branch_length":0.115666,"children":[{"events":{"type":"speciation"},"branch_length":0.090766,"children":[{"events":{"type":"speciation"},"branch_length":0.032298,"children":[{"events":{"type":"speciation"},"branch_length":0.289267,"children":[{"events":{"type":"speciation"},"branch_length":0.030535,"children":[{"events":{"type":"speciation"},"branch_length":0.088215,"children":[{"sequence":{"mol_seq":{"seq":"MAYKSGKRPTFFEVFKAHCSDSDLGPVSLDWFEELSSEAPPYEPKLLGEPEGPIGWFDQTFKTPKAKSSTDSQLASTPLIFKEQNTMPPFSSPGKELDQKKMETSRENLLSPSMAGRKTDQENQILASPHGICHNYTAASPAIVRNPCRTPQGSNIPGPYGSLFCTPKFLEIPTPKRISESLGAEVDPEMSWTSSLATPPTLGVTVIIARENDSISGAKQQDERAEIVLHNFLSEDDGYTAKNDTSLLSIPETVKLNARDDIKDLESEVLDGLFGETNSFEDSFNLPAESSGILLSPRALDAIEKCEIKIDEAQEKSDVLSEQHRRRKSTISQEVKAANWTEKSCCVEVKDSIIQNTNEDIMDSKDNCLLGREKELEYLRIAGNLQDNRTQKSSVNEKLVKDVLSSSSQWSQLNLSGLDVTHLEMSICSSPQSDSCREKGLEEKSVLMTKDDAVETSLLNTSGLRNAQELSSASLSENGSDTKISKNNPMSEITPVKPVCASPKLVKGYAHEEVSGMSFLNCSSFLIESTNVMEYSVVYNSTFSTHLKATSQSVVTDVLSHPLICSAASPDNCSDLHLRNSEKTLRKSNFKSLNMLSRLRKKSKRFIYTINNTLVYQEENVQKEVTSESPDNPVLTHLESDLHEFKDCQVATDGNQDCLLSAERQSNIKENNLNTLTIKVDIMDNSSDNSVNNRLKQELSESGKNAREYQPATSFKCLKASHTESEDTTDCLNSGRISNIKHKVLTSAYLMARRHSRLFPEDCCLRKGKNDTYTVSNVNSRAAVPRSPKGQPPQSSPSCSDCLIDMRHGTAFVTNSKFNNTLSHIKFGMNRVSSNSCNKISADKRRASDQLSVAECREIVAPLGINCLENNSTSLKQRGKEDVDENQETLSIKSSENPQAAAWNNESIEVAEEFLDCIDNSLNEVVSEEDRQVAPVYFNTKPIESLEHKGKSSGDLNACSSSLSFGGFQTASNKQIKFSESSIAKGKMLFKDIENEFFEASSMERVRNFSNRVQKENIFSSDLESKTGSTSSGLQTRCMQYIPRKVDLCKNSPRNQLSVQEPNQSLTASQEAEIAELSNILEETGSQFEFTQFRKQSNMIQSHIQQFGATNVENASEAGEDTNFYSTLKSENHVINDEYCSKLKNENECKMVEYEKEDTVVFHKNKRKVTFTNLDRNESRISSHESCPVPLRDSFSNFVGFTSAGGKKINICKAALTRSAELFKDLDDDNFLFKSSGTNTRCCNSDGHVSSNWNFLRCQTKEDEGGILCVPNIKSVGPISHHSEKKYAENISSPCKENTENWTEILSDNVDFSCTNGGYSASGMRNSPSSFKKPHQNCKNSDQFLNQGNSEVEGCLQEDTSYLICLGDNITSAEEHDLNVSDEMENLSPNQKEDRKQEDEHLLLNRQAADTDAVSISDSSFRSSLRDLNVQCGERDTDVSEKSSKQKTNSVSVEGEDSTYKNLFVSESEIKIGSNQRHQVPSEQEMDVDKNKVKGTYLTGFCTASGKKITIADGFLAKAEEFFSENNVDLGKDDNDCFEDCLRKCNKSYVKDRDLCMDSTAHCDADVLNFKDKLIPQEPGDRLKQTIEESPIIQAVNHDSIKVGAFINVDEDCERNLAAPCANKEAYVRPGKSEVESLPVHGNNSLSRTLLLEDRKQFAERDVEYSATKRDNSESKPDSSLKCATSLHLTKVSSHLADNSVPGGIIQTVSAEDSCKSNQSFLLPRGSVPRSTSPYLNCGNKEIDLKRLNEPCSNTDSFTNTVDNAHQEQSEFDLPEDETNLTCLQETSLNAESQKSDLKQVFSTAKGKAVSVSESALASIRQMFQTDCDASVKSEIETKSGTNQTAIAGSSSFSIHAGGPGFATFLDTRKSEMNVAAPHFINGNGNLIENNHQGANMFADADSVPGFQMQCFEQKSKLLGHFPVPDKQMEQSGPSGNLGFFSTASGKPVQLSEESLKKARQLFSEMEGSHSSGLQDAHLLEDVEKSTNHGEVFPREMQLLLPRGKENASTDKISSPALGFSTASGKQVTISESAYQKAKAILKEADGFLSSELGVTNELCEIKESGQHAEYLTGKVISESKTEKSCSEELDLKSIHPEKMKSLPSTHRVKITEYVPHSKRNSQSAPFKNSFEQEETRFFRKGELNLGIKAESESDLCSATSKAEINIFQTPKDYLKTEAVESAKAFMEDDLSDSGVQVKSAQSFGKMSDNFQNKPFGKRHLDEKDSHGEPPIKRQLLLEFEKMKIPPKSVKPLKSTPDGIFKDRRKFMYHVPLKPVTCRPLGTTKERQEVRNPTLTLPDQDLKGFKSIPAVFQHCALRQSSSGASGLFTPHKAVAKDSEETRSLCKSGKAVKTFVPPFKTKLTLSTGEQGGSKRCHSPIRNSVMEERELNQIPVEQNSAEARDHQSCILHAAVTDIENDNLVTSNMMANLHCARDLQEMRIKKKYRQNISPQPGSLYVTKTSARNRISLKTAVEEETPSFHSTEKLYTYGVSKHCIQVNSTNAESFQFLIEEFFSKEYLLAGNGIQLADGGWLIPTDEGKAGKKEFYRALCDTPGVDPKLITEAWVYNHYRWIVWKLAAMEVCFPHKFANRCLTPETVLLQLKYRYDLEIDKSKRSAIKKITERDDAAGKTLVLCVSKVLSLNTAVSPSNSNNNTEGEKAAAIIEVTDGWYGIRALLDPPLKAFLHRRRLTVGQKIIVHGAELIGSPNGCTPLEAPDSLMLKIAANSTRCARWYTKLGFHRDPRPFPLPLSSLYSEGGTVGCIDVVVQRTYPIQWMEKTSAGSYVFRNSRAEEREAAKHAEDQQKKLEALFAKIQAEFEKHEERNCRRAPRSRIVTRQQIHNLQDGAELYEAIQNAADPSYMEGYLSEDQLKALNAHKQLMNDKKQTRIREEFKKAVESAEQEKHGFSKRDVSTVWKLFVIDYRKQEKHRGVILSIWRPLLDVCSLLKEGSRYRICQLSTSQSKGRSDSTNVQLSATKKTRYLQLSVSQKMLQQIFFPRKALKFTSLLDPSYQPPCAEVDVVGVVISISRTGFSNMVYLSDESYNLVAIKIWADLRHFAIEDIVVRCSFIAASNLQWQSEFRSEIPVLLAGDLSAFSASPKENHLQEKFNELRRMIENVDSFCSDAESKLMNLLQRNCSLTPILPKRCGLECSSPSCNSGLYAEDRSSISSKIETKHPSPLSASTPNTKLFPQGSAITPSSAVSSENHPRNSKKRKAVDFLSCIPAPPPLTPLCSIISPSLKKAFQPPRRLGSQHSKLSKETNPNAGCVTPSRRLREAVQLPDNDLVADEELAMINTQALINTVPEEKKMDYVNEDSTRATNLSGDTRA'..b'FSTARGAKLKVSEKALEQARMFLNDVDSIGESQTPKLVPRSSGKHDVSMQSTRLQKTKDLCRGETSIKISQPDSGVANQISNGADITTFNSENVFQEQKSGNAAKEVCEKISPSETSMPQPQQGYGFQTASGKGVSVLPSALKKAKAIFKDCDSNIDNLQSTNMEERKTKLDVEIVKQTNALISNSKSVTFSDVEEFKTDLINNLDQEAPQKEVCELKGLQSEFSNLISSNGNCGFSTASGKKVSVSAEALQRAKDVLFESVDGFSCANVYKKTNQVVDIQLDSSSSGKHKGFCTAGGKKVAFSATGLQKAKNLFRGCEEESLTTEQNCKGLSNVLMLACNGVSLIPEPGNSSGNNVGFSTAGGRKMDISVTALQKANNLFKDCEEESLASRSLAHQGFTTASGKNVFVSEKALSEVRAVFAGCDETSFSLELKKLSVNNVGFSTAGGKKVTISDTSLQRTMNLFQDCEEESLGSRSLKHQGCKGFTTASGKNVTVSEKALSEVRAVFAGCDEASFSHEPKNISGNKIGFSTTVEKMTTALEMPNNNNNFKDCEEESLASRNLMHQACKGFTTASGKNVTVSETALNEVKAVFAGCDEATFDLEPKKSLGSNIGFSTAGGKKVTISSTALQRAQTLFKDCEEEKEVFESEKAPLPTKSFHARSEDIVDGNLKFDQTNKKNPRLSTASGKVVSVTKVSLEETSTFFREFDNQNTATDNQLLLRDSSKHYPQHKDRQTKATLHPKAARNEPAHLDLHSLDFNSCTDTQQIYFEQEAMACTKALLEDDDLIESAGLISSEDIDNKRRPSFSDVQTIESVDQNRKRKRQVDGSSVADSGQPPLKRQLLSEFDRTLHAKTSGLTPLKSCPNGTLKDRRVFKYNVHLKPYVTSPVLFPVNQQSNNIEEHCSTESVQKRCNSDHMGGVFNPPFQKNMNPPTSNSQDASKVSSGIVLSFNVVNQEENPNSKEMDQIMAISKSHCDRGKQNCNEKNQDSKSKSSSSVQSIPLKIGNFDEKDMIALQESLQLARDMQDMRLRKKKRQTIRPVPGSLYLAKTSGVSRKSLRDAVGCTCPSQYTQDELSQHGVHHKVLEITSENAESFRFDCSDYFTCEHLMESGALQLADGGWLVPDSKGTVGKEEFFSALCDTPGVDPKLISDVWVFNHYRWIIWKRASMERTFPNLIGGLCLTPEQVLLQLKFRYDVEVDHSQRSALRRIMERDDTPAKTLVLCVCGIVQTCQNPEKTMKDDKSPSAKMESCVIWLTDGWYSIKSLLDPPLSAMLNKGRLKIGDKIVTSGAELVGSQEACPPLEAPESLMLKISANSTRRARWDTKLGYYRDPRPIRLLLSSLYASGGLVSCVNLLVLRSYPTQWMEKKPNSVFIFRNDRAEDREARKHSNSKHKSLDLLISKIQTQFEKEMEGKKKKRAQRRTFSRHEIETLQDGDELYEAMEQDAAVETRLSHKQMEAVSKYRCCREEKRQAELQERVQKAVMEAQEAEGGCPNRDVTPVWKLSVIDASDMQSNCVYTLNIWRPTRELQGLLTEGHRYRAYHLASSEGKKRSGVAHIQLTATKKTLFQDIEVSPEWLHQHFRARECVRFRELQNPHFSSPCGEVDIVGYIVSIEGKQGHCPVLHLVDENFDLVTVRTYSSLEQLAVEELVKPRALVAICNLQVRVLSGPVPSLYAGEQALFSINPKESYLQEAMAHLKTFAQNYEQFFNLAEEKVSDVVPSGVLGSFQSPRTPGVQPFPKMNGTVTPQQKSSIFSPFTPLNRRTPASTSNSEVKDSKNLKRRRGLDYLSRIPSPPPLIPLKTRASPCINKTFNPPRKSVTPKPPQNECSPASRPPAGEEKWVHDEELAMIDTQALVDGLMND","is_aligned":0},"location":"15:32054010-32070540","name":"brca2-001","id":[{"source":"EnsEMBL","accession":"ENSDARP00000099674"}]},"branch_length":0.529522,"id":{"source":"EnsEMBL","accession":"ENSDARG00000079015"},"confidence":{},"taxonomy":{"scientific_name":"Danio rerio","common_name":"Zebrafish","id":7955}}],"confidence":{"bootstrap":34},"taxonomy":{"scientific_name":"Otophysi","common_name":"Teleost fishes","timetree_mya":152.9,"id":186626}}],"confidence":{"bootstrap":9},"taxonomy":{"scientific_name":"Clupeocephala","common_name":"Teleost fishes","timetree_mya":265.5,"id":186625}},{"sequence":{"mol_seq":{"seq":"MIQNLDFARGMQEMRIRKKKRQTIRPLPGSLCLTKTSGVSRVSLRAAVGGKSPAQHTQQQLYVCGVNRCVLEISSENAESFRFSCRDHFGIEFFSAGNGVQLADGGCLIPDNKGTAGKEEFYRALCDTPGVDPKLISESWVYNHYRWIVWKRAAMERAFPLEMGSRCLTPEQVLLQLKYRYDLEIDNSQRSALRKIMERDDTPAKTLVLCVCRIVSMGSLQAHDTPGNKALPYAKAKTDGPVGVIEVTDGWYAIKALLDAPLTAILRKGRLAVGGKIVTHGADLIGCQDACSPLEAPEALMLKICANSTRLARWDTKLGFHRDPRPFHVPLSSLFSNGGRVGCVDVVVLRTYPIQWMEKKADGVFVFRGDRAEEREARRHNENKQKTMEALFAKIQSEFEKEQEGKKKSNRRQRFSHQEIQALQEGEELYEATESDPVCLEACLSEQQLMTLNNYRRALNERKQTKLQEEFQKAIRSAQDRENSCPERDVTPVWKLLVVDCKNLQNNTAYFLNIWRPSTEVCSLLKEGCRYKIYQLATSETKRRIGNAAVQLTAMKKTQFEQLQVPPELLCQLYASREAVSFRSLLSPRFQPVCGEVDLVGYVISITGKQGGAPVVYLVNENRDFVAVKCWVGLSQLALEDIVQPRALLAVSNVQARPSPAAAVPTAYAAELSVFSANPKETHLQAACTLLRSAAQGIECFFEAAEEKLSKLIKEDFPCQSLKDLSSIPKTPIMKPDVQRESSNLMISFILLLPSPQVPGRSPQQSLCAFEHLTPTCGKPPLHAGRSDDKGPKSLKRKRGLIYLSRIPSPPPLAPLRSPCVNKTFQPPRRCASPLAAARGKEECRSPGPAPEAGGEWVKDEELAQINTQDL","is_aligned":0},"location":"LG3:13357730-13380925","id":[{"source":"EnsEMBL","accession":"ENSLOCP00000009962"}]},"branch_length":0.204682,"id":{"source":"EnsEMBL","accession":"ENSLOCG00000008205"},"confidence":{},"taxonomy":{"scientific_name":"Lepisosteus oculatus","common_name":"Spotted gar","id":7918}}],"confidence":{"bootstrap":87},"taxonomy":{"scientific_name":"Neopterygii","common_name":"Ray-finned fishes","timetree_mya":333.8,"id":41665}}],"confidence":{},"taxonomy":{"scientific_name":"Euteleostomi","common_name":"Bony vertebrates","timetree_mya":441,"id":117571}},"rooted":1,"id":"ENSGT00390000003602","type":"gene tree"}\n'
b
diff -r bce784076824 -r 3b686142e9c2 test-data/genetree.phyloxml
--- a/test-data/genetree.phyloxml Fri Mar 24 11:57:48 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,1485 +0,0 @@\n-<?xml version="1.0" encoding="UTF-8"?>\n-\n-<phyloxml xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org">\n-  <phylogeny rooted="true" type="gene tree">\n-    <clade branch_length="0">\n-      <taxonomy>\n-        <id>117571</id>\n-        <scientific_name>Euteleostomi</scientific_name>\n-        <common_name>Bony vertebrates</common_name>\n-      </taxonomy>\n-      <clade branch_length="0.169242">\n-        <confidence type="bootstrap">3</confidence>\n-        <taxonomy>\n-          <id>8287</id>\n-          <scientific_name>Sarcopterygii</scientific_name>\n-          <common_name>Lobe-finned fish</common_name>\n-        </taxonomy>\n-        <clade branch_length="0.117496">\n-          <confidence type="bootstrap">3</confidence>\n-          <taxonomy>\n-            <id>32523</id>\n-            <scientific_name>Tetrapoda</scientific_name>\n-            <common_name>Tetrapods</common_name>\n-          </taxonomy>\n-          <clade branch_length="0.115666">\n-            <confidence type="bootstrap">99</confidence>\n-            <taxonomy>\n-              <id>32524</id>\n-              <scientific_name>Amniota</scientific_name>\n-              <common_name>Amniotes</common_name>\n-            </taxonomy>\n-            <clade branch_length="0.090766">\n-              <confidence type="bootstrap">26</confidence>\n-              <taxonomy>\n-                <id>32561</id>\n-                <scientific_name>Sauria</scientific_name>\n-                <common_name>Reptiles and birds</common_name>\n-              </taxonomy>\n-              <clade branch_length="0.032298">\n-                <confidence type="bootstrap">26</confidence>\n-                <taxonomy>\n-                  <id>1329799</id>\n-                  <scientific_name>Archelosauria</scientific_name>\n-                  <common_name>Birds and turtles</common_name>\n-                </taxonomy>\n-                <clade branch_length="0.289267">\n-                  <confidence type="bootstrap">100</confidence>\n-                  <taxonomy>\n-                    <id>8825</id>\n-                    <scientific_name>Neognathae</scientific_name>\n-                    <common_name>Birds</common_name>\n-                  </taxonomy>\n-                  <clade branch_length="0.030535">\n-                    <confidence type="bootstrap">100</confidence>\n-                    <taxonomy>\n-                      <id>1549675</id>\n-                      <scientific_name>Galloanserae</scientific_name>\n-                      <common_name>Fowls</common_name>\n-                    </taxonomy>\n-                    <clade branch_length="0.088215">\n-                      <confidence type="bootstrap">100</confidence>\n-                      <taxonomy>\n-                        <id>9005</id>\n-                        <scientific_name>Phasianidae</scientific_name>\n-                        <common_name>Turkeys</common_name>\n-                      </taxonomy>\n-                      <clade branch_length="0.02756">\n-                        <name>ENSGALG00000017073</name>\n-                        <taxonomy>\n-                          <id>9031</id>\n-                          <scientific_name>Gallus gallus</scientific_name>\n-                          <common_name>Chicken</common_name>\n-                        </taxonomy>\n-                        <sequence>\n-                          <accession source="Ensembl">ENSGALP00000027524</accession>\n-                          <name>BRCA2-201</name>\n-                          <location>1:174560560-174597292</location>\n-                          <mol_seq is_aligned="0">MAYKSGKRPTFFEVFKAHCSDSDLGPVSLDWFEELSSEAPPYEPKLLGEPEGPIGWFDQTFKTPKAKSSTDSQLASTPLIFKEQNTMPPFSSPGKELDQKKMETSRENLLSPSMAGRKTDQENQILASPHGICHNYTAASPAIVRNPCRTPQGSNIPGPYGSLFCTPKFLEIPTPKRISESLGAEVDPEMSWTSSLATPPTLGVTVIIARENDSISGAKQQDERAEIVLHNFLSEDDGYTAKNDTSLLSIPETVKLNARDDIKDLESEVLDGLFGETNSFEDSFNLPAESSGILLSPRALDAIEKCEIKID'..b'SLLEEANSQYEFTQYKSTNIGSHNRTTEKEWDPDILNDIDFDDSFSCDVVKGKHPSKTNASSVNTSDLISFRSDLKEKQNGTVVLSVTEESALVDGMRSILSKSSDNSHFHTFGFKTAKGKAISVSEKSLNKAKHFFEEDYKEATFTGVINQEHFKTESSVKTCISTENNNAQTKQDVRLSCGEVNAAHNMINWQEAGNAGTEPNCLRDKTDVVCVDSNIHFGFSTARGAKLKVSEKALEQARMFLNDVDSIGESQTPKLVPRSSGKHDVSMQSTRLQKTKDLCRGETSIKISQPDSGVANQISNGADITTFNSENVFQEQKSGNAAKEVCEKISPSETSMPQPQQGYGFQTASGKGVSVLPSALKKAKAIFKDCDSNIDNLQSTNMEERKTKLDVEIVKQTNALISNSKSVTFSDVEEFKTDLINNLDQEAPQKEVCELKGLQSEFSNLISSNGNCGFSTASGKKVSVSAEALQRAKDVLFESVDGFSCANVYKKTNQVVDIQLDSSSSGKHKGFCTAGGKKVAFSATGLQKAKNLFRGCEEESLTTEQNCKGLSNVLMLACNGVSLIPEPGNSSGNNVGFSTAGGRKMDISVTALQKANNLFKDCEEESLASRSLAHQGFTTASGKNVFVSEKALSEVRAVFAGCDETSFSLELKKLSVNNVGFSTAGGKKVTISDTSLQRTMNLFQDCEEESLGSRSLKHQGCKGFTTASGKNVTVSEKALSEVRAVFAGCDEASFSHEPKNISGNKIGFSTTVEKMTTALEMPNNNNNFKDCEEESLASRNLMHQACKGFTTASGKNVTVSETALNEVKAVFAGCDEATFDLEPKKSLGSNIGFSTAGGKKVTISSTALQRAQTLFKDCEEEKEVFESEKAPLPTKSFHARSEDIVDGNLKFDQTNKKNPRLSTASGKVVSVTKVSLEETSTFFREFDNQNTATDNQLLLRDSSKHYPQHKDRQTKATLHPKAARNEPAHLDLHSLDFNSCTDTQQIYFEQEAMACTKALLEDDDLIESAGLISSEDIDNKRRPSFSDVQTIESVDQNRKRKRQVDGSSVADSGQPPLKRQLLSEFDRTLHAKTSGLTPLKSCPNGTLKDRRVFKYNVHLKPYVTSPVLFPVNQQSNNIEEHCSTESVQKRCNSDHMGGVFNPPFQKNMNPPTSNSQDASKVSSGIVLSFNVVNQEENPNSKEMDQIMAISKSHCDRGKQNCNEKNQDSKSKSSSSVQSIPLKIGNFDEKDMIALQESLQLARDMQDMRLRKKKRQTIRPVPGSLYLAKTSGVSRKSLRDAVGCTCPSQYTQDELSQHGVHHKVLEITSENAESFRFDCSDYFTCEHLMESGALQLADGGWLVPDSKGTVGKEEFFSALCDTPGVDPKLISDVWVFNHYRWIIWKRASMERTFPNLIGGLCLTPEQVLLQLKFRYDVEVDHSQRSALRRIMERDDTPAKTLVLCVCGIVQTCQNPEKTMKDDKSPSAKMESCVIWLTDGWYSIKSLLDPPLSAMLNKGRLKIGDKIVTSGAELVGSQEACPPLEAPESLMLKISANSTRRARWDTKLGYYRDPRPIRLLLSSLYASGGLVSCVNLLVLRSYPTQWMEKKPNSVFIFRNDRAEDREARKHSNSKHKSLDLLISKIQTQFEKEMEGKKKKRAQRRTFSRHEIETLQDGDELYEAMEQDAAVETRLSHKQMEAVSKYRCCREEKRQAELQERVQKAVMEAQEAEGGCPNRDVTPVWKLSVIDASDMQSNCVYTLNIWRPTRELQGLLTEGHRYRAYHLASSEGKKRSGVAHIQLTATKKTLFQDIEVSPEWLHQHFRARECVRFRELQNPHFSSPCGEVDIVGYIVSIEGKQGHCPVLHLVDENFDLVTVRTYSSLEQLAVEELVKPRALVAICNLQVRVLSGPVPSLYAGEQALFSINPKESYLQEAMAHLKTFAQNYEQFFNLAEEKVSDVVPSGVLGSFQSPRTPGVQPFPKMNGTVTPQQKSSIFSPFTPLNRRTPASTSNSEVKDSKNLKRRRGLDYLSRIPSPPPLIPLKTRASPCINKTFNPPRKSVTPKPPQNECSPASRPPAGEEKWVHDEELAMIDTQALVDGLMND</mol_seq>\n-              </sequence>\n-              <property datatype="xsd:string" ref="Compara:genome_db_name" applies_to="clade">danio_rerio</property>\n-            </clade>\n-          </clade>\n-        </clade>\n-        <clade branch_length="0.204682">\n-          <name>ENSLOCG00000008205</name>\n-          <taxonomy>\n-            <id>7918</id>\n-            <scientific_name>Lepisosteus oculatus</scientific_name>\n-            <common_name>Spotted gar</common_name>\n-          </taxonomy>\n-          <sequence>\n-            <accession source="Ensembl">ENSLOCP00000009962</accession>\n-            <location>LG3:13357730-13380925</location>\n-            <mol_seq is_aligned="0">MIQNLDFARGMQEMRIRKKKRQTIRPLPGSLCLTKTSGVSRVSLRAAVGGKSPAQHTQQQLYVCGVNRCVLEISSENAESFRFSCRDHFGIEFFSAGNGVQLADGGCLIPDNKGTAGKEEFYRALCDTPGVDPKLISESWVYNHYRWIVWKRAAMERAFPLEMGSRCLTPEQVLLQLKYRYDLEIDNSQRSALRKIMERDDTPAKTLVLCVCRIVSMGSLQAHDTPGNKALPYAKAKTDGPVGVIEVTDGWYAIKALLDAPLTAILRKGRLAVGGKIVTHGADLIGCQDACSPLEAPEALMLKICANSTRLARWDTKLGFHRDPRPFHVPLSSLFSNGGRVGCVDVVVLRTYPIQWMEKKADGVFVFRGDRAEEREARRHNENKQKTMEALFAKIQSEFEKEQEGKKKSNRRQRFSHQEIQALQEGEELYEATESDPVCLEACLSEQQLMTLNNYRRALNERKQTKLQEEFQKAIRSAQDRENSCPERDVTPVWKLLVVDCKNLQNNTAYFLNIWRPSTEVCSLLKEGCRYKIYQLATSETKRRIGNAAVQLTAMKKTQFEQLQVPPELLCQLYASREAVSFRSLLSPRFQPVCGEVDLVGYVISITGKQGGAPVVYLVNENRDFVAVKCWVGLSQLALEDIVQPRALLAVSNVQARPSPAAAVPTAYAAELSVFSANPKETHLQAACTLLRSAAQGIECFFEAAEEKLSKLIKEDFPCQSLKDLSSIPKTPIMKPDVQRESSNLMISFILLLPSPQVPGRSPQQSLCAFEHLTPTCGKPPLHAGRSDDKGPKSLKRKRGLIYLSRIPSPPPLAPLRSPCVNKTFQPPRRCASPLAAARGKEECRSPGPAPEAGGEWVKDEELAQINTQDL</mol_seq>\n-          </sequence>\n-          <property datatype="xsd:string" ref="Compara:genome_db_name" applies_to="clade">lepisosteus_oculatus</property>\n-        </clade>\n-      </clade>\n-    </clade>\n-    <property datatype="xsd:string" ref="Compara:gene_tree_stable_id" applies_to="phylogeny">ENSGT00390000003602</property>\n-  </phylogeny>\n-</phyloxml>\n'
b
diff -r bce784076824 -r 3b686142e9c2 test-data/out.json
--- a/test-data/out.json Fri Mar 24 11:57:48 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,1 +0,0 @@
-{"ENSG00000157764":{"source":"ensembl_havana","object_type":"Gene","logic_name":"ensembl_havana_gene","version":12,"species":"homo_sapiens","description":"B-Raf proto-oncogene, serine/threonine kinase [Source:HGNC Symbol;Acc:HGNC:1097]","display_name":"BRAF","assembly_name":"GRCh38","biotype":"protein_coding","end":140924764,"seq_region_name":"7","db_type":"core","strand":-1,"id":"ENSG00000157764","start":140719327},"ENSG00000248378":{"source":"havana","object_type":"Gene","logic_name":"havana","version":1,"species":"homo_sapiens","display_name":"RP11-5N11.5","assembly_name":"GRCh38","biotype":"lincRNA","end":31744451,"seq_region_name":"5","db_type":"core","strand":-1,"id":"ENSG00000248378","start":31743988}}
b
diff -r bce784076824 -r 3b686142e9c2 test-data/sequences.fasta
--- a/test-data/sequences.fasta Fri Mar 24 11:57:48 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,3435 +0,0 @@\n->ENSG00000157764 chromosome:GRCh38:7:140719327:140924764:-1\n-CGCCTCCCTTCCCCCTCCCCGCCCGACAGCGGCCGCTCGGGCCCCGGCTCTCGGTTATAA\n-GATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAA\n-CGGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGA\n-CCCTGCCATTCCGGAGGAGGTGAGTGCTGGCGCCACCCTGCCGCCCTCCCGACTCCGGGC\n-TCGGCGGCTGGCTGGTGTTTATTTTGGAAAGAGGCGGCGGTGGGGGCTTGATGCCCTCAG\n-CCACCTTCTCGGGCCAGCTCCGCGGGCTGGGAGGTGGGCATCGCCCCCGTGTCCCTCTCC\n-GTCATGCAGCGCCTTCCTACGTAAACACACACAATGGCCCGGGGGGTTTCCCTGGCCCCC\n-ACCCCAGATGTGGGGATTGGGGCAGCGGTGGTTGAGCGGGAGGCTATCAATAGGGGGCGA\n-AACTCAGGGTTGGTCCGAGAAGGTCACGATTGGCTGAAGTATCCAGCTCTGCATCTCTGT\n-GGGGTGGGGGCGGCGGCGGCCTCGACGTGGAGGATATAGGTTAGTTGCTGGGGCTGAGAC\n-AACAGCCCGAGTTACTGTCGCGTGTAATTCTTACATGGTCGTGGGGATGATGGGGCTCAT\n-CATTTCCTCTCTCCTCTCCCGGACTGCCCCCCTTCTCAGTCCGCTGCCCTTTTTCACTTT\n-TCTATTTGGGGATTTCTCTTCACCTGTTTTACCCAGCAAATTATTTTGATTTAGTCTTTA\n-CTTTTTCAATCCTAAATCGCAGTTTCCGATGCCTTTTCTGGTCTCTGGTCCTCTGTTCCT\n-AATGTTTGTCAGCGCTCTGTCGCTGATTGGTAACCCCCATTCTATTCCCATCTACCGCCC\n-GCTCATTTTCCAGTTGTCGGACCTGCCTGCCTTCTAACCCCAGCTCCCACTTAAGAGCAT\n-TTTTGCACTTCTCTTACCCTGGTCCTCTTGAGGCTCTGTACTTGATCTCACCACTCCCTA\n-ACATTGTTGTCTGTTGTTATCTTCACAAATCCTCCTGGACACTTTGGAGCTACTTGTTTT\n-CTGAGCCCAGAAGCTGTCAAGATTCCATCAGGTTTCACTTGGCTCTTTTCGCGCTTGCAC\n-TACTGGCACTTTTTGGCTAGTCGTCCATTGTGCATTCACACCTCTTTATTCCTACCCATT\n-TTTATAGGTCTGATTGATTTCTTAGTGTTGTCCTCCTTTTTGTCCTATTTTTTTCCTTTT\n-CCTTTTTCCTCTCCAGTCCTTGCTTCTCTCAGCCTGTTTTTGCATTAGTCAGCCTCTTAG\n-CACTGTGTCAAATTATTTACGTTTTTTTATTACATAAAATTTATTACAAATATTTGGTAT\n-TTTATTACAGAAAATAATACTTTATTATGCTTTACAAATAAGATATGGTATAATAATTGT\n-GGTTTACAGTTATTGATTAGGTAATGTGACTTACTCTGTTGACTTTGCTCGAAGTTCTCT\n-TTGCTACTTACTATTAACATCTAATTTCTCAATTCTCATAACATCTCATTCTCTCTGCAA\n-TTTTTTTTTTGCATCATCATCTTTGGAAATTCATCCAATATGCTTGCTTTATTCAGCATC\n-AGCTTGTTTATGATAATGTTTGTTTTCTACTCTTTATATCATCTTTGTTACATGCCCAAA\n-ATGTGTTCTGTACCATCATTTGATCTGTTCTAAAATTTCTCATTTTTAAGTTTCTTAAAA\n-TCATTCCACTTTTCAGTATGCATTTTTGCTTAGATCAGTTTCCTCTCATATCTGTTCCTT\n-TCCCCCAGCTTCTTGATTTCTAAGGAGAAAGCTCTTCTCTACTTCAATTTCCTAGTTTAT\n-TCTGTTTCCCTTGTTTCCAGTTACCATTCATTTTGCCTTGTTTCCTGGCTTTTGGTACTT\n-AACTTTCTGAAGCTTCCTCTTTTCTTCTCCACACCTCCACGTTCCTTCTTATTTATAAAC\n-ATCTTTGTTTCCTTTGACATGGAAATTTATTTTTAGGATACATTGTTTTTAATGGATAAA\n-TACTAGGGGTCACATCTGCTGTCTGTTTTCTCCAGGAATCGGATATGCCTTTGTCTTAAC\n-CAGGCACAGGTGCCTCTGGATTTTATTTTACTCTGTAATAGATGTGTAGTTTTGTTGAAT\n-TGTATCTTGTTTGAAGACTACTACAGAGTGGAACAATGAGTGAAGTAATAAGTAGGGGTT\n-ATGAAATTGTAATTCTCTGATTATAAAATTGTTTATCTTGGGAACTTTGCTGCAGAGTTA\n-TTAGAACCGTTTGCAATTCTGTAAAGAAGGCTTTTGTGAAGTAAAATCTCTACCCTTCTA\n-TTTTATTTGAAAGGGCCAGATTGTTTGGAACTGTACCCCCTGAAGAGTCTGATTTAGTAA\n-GTGAGAGCGAGGGCCATGGATTTCTGTATTTGGCACATGTCTTGAGCAGTTCCCATGTAC\n-CAATCCTTGAGAACCTCTAGGCTAGCTGAATTTAAGTATAAATTGCCAGTAATTGGAAAG\n-CATATTCATATCTTCTGAAACTATAAGGATACTCTCATTTTACTTGGTTAAAAAACAAGT\n-GTTTCCTACTGTCCTCTTTACCCAGGTTTTAATGTTTAGTGGTGAACAGTAGTTTTCCCT\n-CTACATTTTTTTCTGAACTGATAATAAATGTATTTGGCTGGGAGGGTGACATTGATTAAA\n-AAATGTATCTCTTGAATGTAAATATCAGTATTACAGATGATAAAATAAATTCCTCCAAGA\n-AATAATTTTAAATTTGAAGTTGATATTCAGTGGAAACTGAAATGTGCTGTGGTCTTTTAT\n-TTGAAGTCTTCCTTACATTCACTTAAAGGGATCTTTTACTGCAAATTACATGGAAAGAAT\n-GAAAAGGTTTGCTTGTGTGTAATGACACATTTTATTCTGAAGATTTATTTTACCTAACAG\n-TAAAATGTAGGTTTTTTTTTTTTAAATAAAAGTTTCCCAGAGGGAAATTTCATCTAAAAA\n-AAAAGTCTGATTTCAAAGGGAAAGCAAGTCATTATCAAAAATTAGAAAACTATAAGTACA\n-AAAAGTAAAAAATCATCAGTAATTTTGCCACTAAGATATTATTACTATAGACATTTTGGT\n-GTATTCCATCTGTTCTTTTTTAATGCTTTTATAACACTATGTAGTTTTGTATTTTAAAAA\n-ACTTAAAGCAAAAATTTCTACGTATTATTAGACATACTGTGATTTATTTAACTAATCATT\n-TTTTTGGGGTGTTAGGTTGTTTTTAATTTTTTACTGCCATCAAACATCTTGAACATAGGA\n-TGTAGATTTTAGTCTTTAAAATATGTTGGGGAATGAACAAATTTCACATCCTGTATTTGT\n-AGTATTAATACTTTGTAGGTGCTCAAAATAGAATATTCTGGTAAATGATTAGTGCTTATT\n-AAATATTTATCAAATGAATGTACTTGTACTTTTGGCATTAAACATTAACATCTGACCATT\n-TATATTTACCTGATTTTTTTTCTATGGCCATATGGTATGAAATAGTGTATGGTATAAATT\n-AACCATATGGTATAATAAATACATTTTTTTAAGTGTGATACCAGAGTGATATTTATTAAC\n-TGTTCTTCCTGTGCTGTTTCTGTAGAAGGGAGCTTCTCACAATTGCATTAGAATTACAAT\n-TTTATTATGTTCTGTTTTCAAGATCTCTGATCGTCAGTCTTAAACTGTTTAATTATAATA\n-ATGTATTGACTAGGGAATATTCTGGGATATAATCTCCTTTATAATGAGGTCCACTGTATT\n-AAAATACATCTT'..b'GAGGGTAGAATGATTACTCTTTTTGCAAGATTCTCTTCTTTGTCCAAGT\n-TGGCATTGTTAGTGCTAGGAATACCAGCACCTTGAGACGAGCAGATTCCAACCATTAGGC\n-TATAAACACCATAGCCAGAGATGGAAGGTTTACTGTGAGTATGAACAGCAAATAGCTTAC\n-AGGTCATGAGTTGAAATGGTGTAGGTGAGGCTCTAGAAAAATACCTTGACAATTTGCCAA\n-ATGATCTTACTGTGCCTTCATGATGCAATAAAAAAGCTAACATTTTAGCAGAAATCAGTG\n-ATTTGTGAAGAGAGCAGCCACTCTGGTTTAACTCAGCTGTGTTAATAATTTTTAGAGTGC\n-AATTTAGACTGCATAGGTAAATGCACTAAAGAGTTTATAGCCAAAATCACATTTAACAAT\n-GAGAAAACACACAGGTAAATTTTCAGTGAACAAAATTATTTTTTTAAAGCACATAATCCC\n-TAGTATAGTCAGATATATTTATCACATAGAGCAACTAGGTTGCAAATATAGTTCAGTGAC\n-ATTTCTAGAGAAACTTTTTCTACTCCCATAGGCTCTTCAAAGCATGGAACTTTTATACAA\n-CAGAAATGTTGACAGAAATTGCTGTAGTTTAGGGTTGAAGTACTGTATGATGGGCAGCAA\n-TCATGTATTAACTTAGAAGGGGAAATTGAAATATAGGACCGAATTTGGTTTTATCAGTTT\n-CCAGAGTACTGCTGCCAACCTAGACACTGATTTTTCAGAGTTTGAAATGTAAATTTCTTC\n-CCGGGACTTGATTGCACATGAAGCTGGACTGCGTTAGTCATCCTGTCCCAAAGCGCTGTG\n-GGGGCCAGGGTGGAGGTCTCAAGGCATCCTTTATGACCTGGCCATTGGATGTAAAAGAAA\n-ACATATTCCATGCTGTGGTTCTTGTATCTTGTTTCATTCCTCACCATTGAAAGAGAAAGT\n-CCATGTATTGTCTCCAGCACATCCTTGAAATGTTATACTGGGATGGATTACTGATGCCCA\n-TCGGTAGTTGAGCCCCAGAAGAGGGTAGTAGCATCTCTGCCTCAGGTGATGATTTGTAGC\n-TTGGCCAGAGGAGAGCGGAGTCACCAGTATATCTGTGGTCCATGTTGCTAGCTCTGGTAA\n-AATTAAAAATACTGGTAAGATGTTTGTTTTATTAGTACACTAGACAGTAAGCTCTGTTTT\n-GTTGTTTTCAAATAACCTATTTTCACTTTTGTTTGGGCAAAGACATTTAAATTGAAATTC\n-AATTCTAATTTTTGTTAATTGTGGAAAGGGTAATTAACAGTTCCTATCAGGTATTTTTAA\n-TGTGGAAAAGGACAGAAACCCAACTCCTAAAATCTTAAATTAAGGTAACAGTGCTTTAAA\n-AAAAAAAAATGCATGGGGCAATTAGTCGGCAACTCAATGAGTGACTAAAGTACTTTTATT\n-TAACATCCACAACTTCAACTGTTAAGTTTTATTAATTACTAAATCAGCTTTATTAAAATG\n-TTGACATTTATTTAGCTATTTTGAATAATTATAGTGACTTGACGAGTGTGTATGAGGACA\n-CAGCCAATGTAAGCCAGTGTATCCATTTTTTAGAGGTGCATTTTTTTTTAAAGAATTCTG\n-TAGATAGAAGTGCTCTGAAAACAACTAAAATATGTTTATTCATGGTAGTATCAAAAAATG\n-TTTGTACAAACCATCTGCTTCTCCCGGCCAGCCGAGTTCATTCTCCAGCACCGTGACCGC\n-TGGTTCTCATGTACAGCACATATGCGGGAGAGTTGGCAGAAAATTTGTGAAGAGATGCCG\n-CAAAGGAAGGGTCTGTTGACGGGTGGGATTGGGGGTTTTGATGAAGTTGCTTAGTCCTGG\n-TTTTGTTTTGAAAATTACTGCGTTGCATTTTTGTGTTAAGTTTTTGAACCCACGTGTGTT\n-TTGGTGGAGTATGAGTTGGAAGTCACTGCAAACTAGCATAAACAACAAAGCTCACAGAGT\n-AGGCACAGATGTAGAGAACAGAGACCAAAATGGGGTGAGGTGGCAGTAAATCTAGGATAG\n-GGAAAAATTAATGTGAGGGTGGGAAATAAACTGTAATTACCTGAAATCAAATGTAAGAGT\n-GCAATAAGTATGCTTTTTATTCTAAGCTGTGAACGGTTTTTTTAAGAATCATTCCTTCCT\n-AATACATTTGTGTATGTTCCATAGCTGATTAAAACCAGCTATATCAACATATAATGCCTT\n-TTTATTCATGTTAATGACCAACGTAAGTGGCTAGCCTTTATGTCTTATTTATCTTCATGT\n-TATGTTAGTTTACATACAGGGGTGTATGTCTCTGTGCTGTCCCCTTCTCCTGCCTTCATT\n-TTAAAATGCATCCATGGGTCCTCCGTGTTTCCTTTGGCCATGCCACATATATAGACTCAG\n-TTTGGCCTTCATGATATCGCCTGATTTTTGAGGACTGTATCACAGTGATATGTATTTGTG\n-GTAATCTCATTTGTTGGTTGTACATCTGATCCTTTCCTCAACATGGCAATTGCTGCCTTT\n-CCTAAGATAGGATCATACAACTGATCAGGGGATTGAATTTGATCATTCATCAACATGTGT\n-CTCTGAATTTTATTCAGTAGTTGTCATTGCTCTTTGGTTTAGACCAAGAAAAAGGAAATC\n-CCCCCTTTTCATGTATTCCTTGGTTTGAGGACATGACTCCTGTAAGGGAGAGGAAAGGGA\n-GATGCTTCCTGTTTGAACTGCAGTGAATTCACGGTTCCTGTTTCACCACTCCAAACCTTA\n-TGGCGACTCACACACACATTCCTCTTTTCTGTTACTGCCAAAGGTTCGGGTTTAGTACAC\n-TTCAGTTCCACTCAAGCATTGAAAAGGTTCTCGTGGAGTCTGGGGCGTGCCCAGTGAAAA\n-GATGGGGACTTTTTAATTGTCCACAGACCTCTCTATACCTGCTTTGCAAAAATTACAATG\n-GAGTAACTATTTTTAAAGCTTATTTTTCAATTCATAAAAAAGACATTTATTTTCAGTCAA\n-ATGGATGATGTCTCCCTCTTTTCCCCTATTCTCAATGTTTGCTTGAATCTTTTATTATTT\n-TTTTTAATTCTCCCCCATACCCACTTCCTGATACTTTGGTTCTCTTTCCTGCTCAGGTCC\n-CTTCATTTGTACTTTGGAGTTTTTCTCATGTAAATTTGTATAACAGAAAATATTGTTCAG\n-TTTGGATAGAAAGCATGGAGAATAAAAAAAGATAGCTGAAATTCAGATTGAAGAAATTTA\n-TTTCTGTGTAAAGTTATTTAAAAACTGTATTATATAAAAGGCAAAAAAAGTTCTATGTAC\n-TTGATGTGAATATGCGAATACTGCTATAATAAAGATTGACTGCATGGAGAAGTCTTCA\n->ENSG00000248378 chromosome:GRCh38:5:31743988:31744451:-1\n-TTGGAAGTGAATTAAGACCCTCTCTTGGATACCAGCTGTGAAAGAACGGACTTTTTTTAC\n-CTAGTAAGGATGTGACAGACCTGCTCCTGACCCTTCTTTGTTAGTGGCCAGTAAATATAC\n-GCAAGGCAAAGTCCCACCCTAGTTTTGAAAAAAGCCAAACTAACAACTCTGCCCAATTTC\n-AAGGAACCGATATAATTTTCAGTACATGAGTGTGATTTAGAAATGGAAACAACATCAGAG\n-CAGGGTTAAATCAACAACCAAGCCATAGACCTTAAAAGGACCAGACTTCATGGTTCAATG\n-ATGCCAAGAAAAAGATTCCACCTATAAACTCTTTAGAAAGTAACCACCTCAGAAAGTTCC\n-AGACAAACCAGTTCTGCTTTGCAGATCAGCCCTGTATAAAGCTCTTACCACATTAGTAAG\n-AAGTTACTTGTGCTTTGGTTACAAACAGGCACAATTAAAGGGAA\n-\n'