changeset 0:bcdd1a35e545 draft default tip

planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author portiahollyoak
date Fri, 22 Apr 2016 12:09:14 -0400
parents
children
files genbank_to_fasta.py genbank_to_fasta.xml test-data/genbank_input.txt test-data/output.fasta
diffstat 4 files changed, 542 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genbank_to_fasta.py	Fri Apr 22 12:09:14 2016 -0400
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+import argparse
+import doctest  # This will test if the functions are working
+
+
+def get_id(line):
+    """
+    This function reads a line and returns the ID name
+    
+    >>> line = 'ID   TE    standard; DNA; INV; 7411 BP.'
+    >>> 'TE'== get_id(line)
+    True
+    
+    """
+    if line.startswith("ID"):
+        id = line.split("   ")[1] #split line into 'ID' and rest of line, take rest of line and define as id
+        id = id.split(" ")[0] #split id into 'ID name' and rest of line, take ID name and define as id
+        return id
+
+
+def get_seq(line):
+    """
+    This function reads a sequence line from a genbank file
+    and returns a sequence with no spaces or digits
+    
+    >>> line = "AGTGACATAT TCACATACAA AACCACATAA CATAGAGTAA ACATATTGAA AAGCCGCATA        60"
+    >>> 'AGTGACATATTCACATACAAAACCACATAACATAGAGTAAACATATTGAAAAGCCGCATA' == get_seq(line)
+    True
+    
+    """
+    seq = []
+    for char in line:
+        if not char.isdigit() and not char == " ":  # If a character is not a digit or space, 
+                                                    # it will be added to sequence.
+            seq.append(char)
+    seq = "".join(seq)
+    return seq
+
+
+def make_seq_dictionary(input_file_handle):
+    """
+    This function loops over a multi genbank file and returns
+    a collection of ID and corresponding sequence in a dictionary.
+    """
+    seq_d = {}  # dictionary with id as key and sequence as value
+    next_line_is_seq = False
+    for line in input_file_handle:
+        line = line.strip()  # strips any leading or trailing whitespace
+        if line.startswith("ID"):
+            id = get_id(line)
+            seq_d[id]=""  # We just create a new key
+        if line.startswith("SQ"):
+            next_line_is_seq = True  # If line starts with 'SQ' then state is true
+            continue
+        if line.startswith("//"):  # If line starts with '//' then state is false
+            next_line_is_seq = False
+        if next_line_is_seq:  # Whatever has been read as true, this is copied to file
+            seq = get_seq(line)
+            seq_d[id] += seq
+    return seq_d
+
+
+def write_seq_d_to_file(seq_d, output):
+    """
+    This function will write the sequence dictionary to an output file
+    """
+    for transposon, seq in seq_d.items():
+        output.write(">%s\n" % transposon)
+        output.write("%s\n" % seq)
+
+description = ( "This script will extract ID names and sequences from a multigenbank"
+               "file and format them into a multifasta file." )
+
+
+parser = argparse.ArgumentParser(description)
+parser.add_argument("input", help="A multi-genbank file.")
+parser.add_argument("output", help="Name of the output fasta file.")
+args = parser.parse_args()
+
+try:
+    with open(args.input, encoding = "utf-8") as input_file_handle:
+        # This will perform the tasks
+        seq_d = make_seq_dictionary(input_file_handle)
+except TypeError:
+    with open(args.input) as input_file_handle:
+        seq_d = make_seq_dictionary(input_file_handle)
+
+with open(args.output, "w") as output:
+    write_seq_d_to_file(seq_d, output)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genbank_to_fasta.xml	Fri Apr 22 12:09:14 2016 -0400
@@ -0,0 +1,58 @@
+<tool id ="genbank_to_fasta" name="Convert multigenbank to multifasta" version=" 0.1.0">
+    <description></description>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command>python $__tool_directory__/genbank_to_fasta.py "$input" "$output"</command>
+    <inputs>
+        <param format="txt" name="input" type="data" label="Multigenbank file"/>
+    </inputs>
+    <outputs>
+        <data format="fasta" name="output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="genbank_input.txt"/>
+            <output name="output" file="output.fasta"/>
+        </test>
+    </tests>
+    <help>
+
+This tool converts a multigenbank file into a multifasta file.
+
+-----
+
+**Example multi genbank file** ::
+
+    ID   DME9736    standard; DNA; INV; 7411 BP.
+    XX
+    CC   Derived from AJ009736 (e1371475) (Rel. 58, Last updated, Version 1).
+    CC   Takis Benos and Michael Ashburner, 1-Feb-1999.
+    CC   Any changes to original sequence record are annotated in an FT line.
+    XX
+    SQ   Sequence 7411 BP; 3047 A; 1363 C; 1109 G; 1892 T; 0 other;
+         GTGACATATC CATAAGTCCC TAAGACTTAA GCATATGCCT ACATACTAAT ACACTTACAA        60
+         CACATACACC CCAATACAAC ATACACTACT CCGGATGTAC CCAACAGATA CCAGATAAGA       120
+         ATAAGATTGT TATATGATCC TCGAGAATGG AAAAAACCCC AATTCTAGAT AAGTCACCCA       180
+         CTGGTAGACT AAACATCCGT CCCCTAATTT AAACAATTCC TTGCTTAAGC CTCACCCCAT       240
+    //
+    ID   DMIS176    standard; DNA; INV; 7439 BP.
+    XX
+    CC   Derived from X01472 (g8142) (Rel. 36, Last updated, Version 2).
+    CC   Takis Benos and Michael Ashburner, 20-Aug-1997.
+    CC   Any changes to original sequence record are annotated in an FT line.
+    XX
+    SQ   Sequence 7439 BP;  2985 A; 1512 C; 1048 G; 1894 T; 0 other;
+         AGTGACATAT TCACATACAA AACCACATAA CATAGAGTAA ACATATTGAA AAGCCGCATA        60
+         CGTAAACAAT AAGTGACCAC CATGCTAATG TGGATCAAAT AACAAAAATA TCCACTCTGC       120
+    //
+
+**Example output fasta file** ::
+
+   >DME9736
+   GTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAA....
+   >DMIS176
+   AGTGACATATTCACATACAAAACCACATAACATAGAGTAAACATATTGAAAAGCCGCATA
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genbank_input.txt	Fri Apr 22 12:09:14 2016 -0400
@@ -0,0 +1,388 @@
+ID   DME9736    standard; DNA; INV; 7411 BP.
+XX
+AC   AJ009736;
+XX
+DR   FLYBASE; FBgn0026065; Idefix.
+XX
+FT   source          AJ009736:1..7411
+FT   SO_feature      five_prime_LTR ; SO:0000425:1..600
+FT   SO_feature      three_prime_LTR ; SO:0000426:6841..7411
+FT   SO_feature      CDS ; SO:0000316:<988..2031
+FT                   /name="Idefix\gag"
+FT                   /db_xref="FLYBASE:FBgn0027381"
+FT                   /db_xref="SPTREMBL:O96739"
+FT                   /db_xref="NCBI_PROTEIN:CAA08806.1"
+FT                   /translation="ARKLKDIMAVPQLSETHLNQLLNQIKELNYYDGAPGKLSGFVNQV
+FT                   EQLLSLYPTQEARQAHVIYGAVKRLLVDSALEVVTQERANTWLDMKKALAMAFKDHRPY
+FT                   VTLIRQLEDISYPGSICKFIEKLETQYWIMFDKLELESDHVDKSNYTEMLNKTVKSVID
+FT                   RKLPDRIYMSLARKDIDTIYKLKQASMELGLYDAIPENHRSNRTEMNKRRNRGNYNQNN
+FT                   NQKYYNNRNHNYSNYYPSMNQNHNTQPPQNPTQPMTNQNQYSPRFIPNNQRGNYYAFRR
+FT                   DLTQAQQNNPLNNTLNFQPSTSNNINRQGPVKRQRESQSDQSRMDVNFHQAASDTQMIE
+FT                   KDIQVPM"
+FT   SO_feature      CDS ; SO:0000316:<1950..5402
+FT                   /name="Idefix\pol"
+FT                   /db_xref="FLYBASE:FBgn0027380"
+FT                   /db_xref="SPTREMBL:O96740"
+FT                   /db_xref="NCBI_PROTEIN:CAA08807.1"
+FT                   /translation="PKQDGCKFSSSCLGHSNDREGHTSPYVKIIHHNKNYKGMIDTGSS
+FT                   INIIRENFENLEEKEENLIVYTIKGPITLKRSIIIKPTSVCPSAQKFYIHKFSDNYDFL
+FT                   LGRKYLEDTKAKIDYANETVTLGSKVFKFLYEEKKGETASKCLDPQEKNDSALVDRTKP
+FT                   KMQKVKTAPKCLKPKHQQQKKETALPKCLISNVVKDTVDNDVTHLDPMSVDNDIVNFAI
+FT                   NNELRECNEYRLEHLNAEEVECLKKFLYEYRDIQYKEGENLTFTSTIKHVIQTQHEDPV
+FT                   YRKPYKYPQSVDQEVNKQIKEMIEQGIVRKSKSPYCSPIWVVPKKADASGKQKFRLVVD
+FT                   YRNLNEITVNDKFPIPRMDEILDKLGRCQYFTTIDLAKGFHQIQMDENSIAKTAFSTKH
+FT                   GHYEYTRMPFGLKNAPATFQRCMNNLLEDLIYKDCLVYLDDIIVYSTPLEEHILSLKKV
+FT                   FEKLRDANLKLQLDKCEFMKKETEFLGHIVTTNGIKPNPNKTKAITNFPLPKTPKQIKS
+FT                   FLGLCGFYRKFIPNFAKIVKPMTLKLKKGAIIDTKCKEYIESFEKLKVLITSDPILIYP
+FT                   DFSKPFSLTTDASNVAIGAVLSQNHKPVCYASRTLNEHEINYATIEKELLAIVWATKYF
+FT                   RSYLFGRPFEVLSDHKPLVWLNNIKEPNMKLQRWKIKLNEFDYKIKYLPGKENHVADAL
+FT                   SRTKIEVMVGEVANSADATIHSAIEDNLNYIPITERPINYFSRQIEIEKGDNDTTSVQH
+FT                   LFQKLKIKIVYKEMTPELAKNLIKEYVCTKKSAIYFPNDEDFLIFQRAFTEIISPNNFT
+FT                   KLLRCTTKLIDILTYAEFKDLILKKHKELLHPGIEKTINLFKEEYYYPDSQKLIQTIIN
+FT                   ECQICYLAKTEHQTQMTYETTPEIFNTREKYMIDFYLTGNQIFLSCIDIYSKFASLVEL
+FT                   KSRDWLEAKRAITKIFNDMGKPQEIKADKDSAFMCLALQNWLRSEGVQISISTSKNGIS
+FT                   DIERFHKTVNEKLRIIGSQQNVEDRCTKFERILYIYNHKTKHNSTKRFPADIFLYAGSP
+FT                   DFNVQQNKIDRIEYLNKNRHDFEVDIKYRQAPLVKSKITNPFKKTGRIGQVDDKHFEET
+FT                   NRGRKIVHYKSKFKKQKKFNKSKYDNSRPTKEAQSTQHTSNNA"
+FT   SO_feature      CDS ; SO:0000316:5248..6780
+FT                   /name="Idefix\env"
+FT                   /db_xref="FLYBASE:FBgn0027382"
+FT                   /db_xref="SPTREMBL:O96741"
+FT                   /db_xref="NCBI_PROTEIN:CAA08808.1"
+FT                   /translation="MINISKKQIVAGRSFTISQNLRNRKSLIRANMIIPDQPKKHKVHN
+FT                   ILLIMLSCILSLIITVKCNNIEVNPVNAKNGYLIFQTGTMEIPTSYEYHYLSINITKTM
+FT                   LMFEDIVSEANNYPNVPQIQYLVDKLKREINGLRIISRSKRGLLNVVGKAYKYLFGTLD
+FT                   EDDREELEEKINNMSEDSVKTHDLNTILDVINSGIDIINKLKVDKEQHQQIAVLIFNLE
+FT                   QFTEYIEDIELGLQLTRLGIFNPRLLKHDYLKHVNSEKMLKIKTSTWLKTDTNEILIIS
+FT                   HIPSEVTKVPIFQIVPYPDEHNYILTEQIFDKFYIFDNQVFHKDTNRDIFDKCIIGIIK
+FT                   QEQTQCKYIKTHKNYQINYIEPNILLTWNIPETAVNQDCTHNKILISGNNIIKIKNCTI
+FT                   QIDEFLISNNLADFTQTIYITNNVTRLEPINHLQTREMIETHVKHYNFFQIICITTFVI
+FT                   MIISLTLYVAYKFKNIPKKIIVNIVSKKNTRTLKIMSMKIFNKEIILPYTQI"
+XX
+CC   Derived from AJ009736 (e1371475) (Rel. 58, Last updated, Version 1).
+CC   Takis Benos and Michael Ashburner, 1-Feb-1999.
+CC   Any changes to original sequence record are annotated in an FT line.
+XX
+SQ   Sequence 7411 BP; 3047 A; 1363 C; 1109 G; 1892 T; 0 other;
+     GTGACATATC CATAAGTCCC TAAGACTTAA GCATATGCCT ACATACTAAT ACACTTACAA        60
+     CACATACACC CCAATACAAC ATACACTACT CCGGATGTAC CCAACAGATA CCAGATAAGA       120
+     ATAAGATTGT TATATGATCC TCGAGAATGG AAAAAACCCC AATTCTAGAT AAGTCACCCA       180
+     CTGGTAGACT AAACATCCGT CCCCTAATTT AAACAATTCC TTGCTTAAGC CTCACCCCAT       240
+     CGTCACATTC CCACGTTCAA AGCTCGGAGC CGCAATCCCG AAAAACAAAA GTATCGATTT       300
+     CAATAAACAA ATTATAAGAA TCTAAGAGCA CTTGTATCCA AGAGCAAATG CACTTGAATC       360
+     CAAGAGAAAC GCAAAGCTTT TTCTCTTTAC GATCAGAATC CTAAAGTCTA AAGTCCATAT       420
+     TAGAAAAGCT CGATACCGAG GCTTGAACGT CAACCAAATC AGAATAATTA TCAGAGTTCA       480
+     GTTTGAGACC TAATTGTAAA AGGTTCGGTG TTCTTCTCAA ATAAAAAGAT TGTAATCATT       540
+     TAGTGAAATA AAAATTATAT TTTTTTCACT TATAAATATT GCAAGTATTT AATTGGCGCA       600
+     GTCGGTTAGG ATCCAATAAA ATAAAAGAGT CCTTTTAGTA CGGTACTGAT CAACTGAAGG       660
+     ATATGCTATA CGACTAGCTA TCCAAGATCA GCGAATTAAA ATAGTGATTC AAAAATATTT       720
+     TTTAATCCGC AAAAGAATCT ACGTGAAAGT AGTATTCAAA ATAAAATCCC GTGCGGTCGG       780
+     AAACAAAAAT TAATTTAAAT TTTTTAATTC CGAAACTTAA AACCAAGTTT AAAGAAAACT       840
+     TAAAATCAAG AAAACTTAAA ACCAAGTTTA AAGAAAACTT AAAATCAAGA AAACTTAAAA       900
+     CCAAGTTTAA AGAAAACTTA AAATCAAGAA AACTTAAAAC CAAGTTTAAA GAAAACTCAA       960
+     AATCAAGAAA ACTTAAAGCC AAAATAAGCT AGAAAACTAA AAGACATCAT GGCAGTCCCA      1020
+     CAACTCTCAG AAACACACCT AAACCAACTG CTAAACCAAA TCAAAGAATT AAACTACTAC      1080
+     GATGGCGCAC CTGGCAAATT ATCTGGATTC GTCAACCAAG TGGAACAACT GCTCAGTTTA      1140
+     TACCCAACAC AGGAAGCAAG ACAGGCACAC GTCATATATG GAGCAGTGAA GCGGTTATTA      1200
+     GTGGATTCAG CCTTAGAAGT CGTAACCCAG GAAAGAGCTA ACACATGGCT GGACATGAAG      1260
+     AAAGCACTGG CAATGGCATT CAAAGACCAT AGACCTTATG TAACTCTCAT CAGACAATTA      1320
+     GAAGACATAT CATACCCAGG AAGTATCTGT AAGTTTATAG AAAAATTAGA AACACAATAC      1380
+     TGGATTATGT TCGATAAGTT AGAATTAGAA AGTGACCATG TTGATAAATC GAATTATACC      1440
+     GAAATGTTAA ACAAAACTGT TAAATCAGTA ATAGATCGAA AACTGCCGGA TAGAATTTAT      1500
+     ATGTCTTTGG CACGTAAAGA TATTGATACA ATTTATAAAT TAAAACAAGC ATCAATGGAA      1560
+     TTAGGCCTTT ATGATGCTAT TCCAGAAAAT CACCGTTCTA ATAGAACAGA AATGAATAAA      1620
+     CGTAGGAACA GGGGAAACTA TAATCAAAAT AATAATCAAA AATATTACAA TAATAGAAAT      1680
+     CACAACTACA GTAATTATTA TCCTAGCATG AATCAGAATC ATAATACACA ACCACCTCAG      1740
+     AATCCGACTC AACCTATGAC AAATCAAAAC CAATATTCAC CGCGTTTCAT ACCGAATAAT      1800
+     CAAAGAGGGA ATTATTATGC ATTTAGACGA GACTTAACAC AAGCTCAGCA GAACAACCCA      1860
+     CTTAATAACA CCCTTAACTT CCAACCTTCG ACATCGAATA ATATTAACAG ACAAGGGCCA      1920
+     GTAAAAAGAC AACGCGAGAG TCAGAGTGAC CAAAGCAGGA TGGATGTAAA TTTTCATCAA      1980
+     GCTGCCTCGG ACACTCAAAT GATAGAGAAG GACATACAAG TCCCTATGTA AAAATAATTC      2040
+     ATCATAATAA AAATTATAAG GGAATGATCG ATACAGGATC ATCAATTAAC ATCATAAGAG      2100
+     AAAATTTTGA GAACTTAGAA GAAAAGGAAG AAAACCTAAT AGTATACACT ATTAAAGGAC      2160
+     CAATAACACT AAAGAGAAGT ATAATAATAA AACCTACTTC AGTATGTCCG TCTGCTCAAA      2220
+     AATTCTACAT TCACAAATTT TCTGATAACT ATGATTTCTT GTTAGGTCGA AAGTATTTAG      2280
+     AAGATACAAA AGCTAAAATA GATTATGCTA ACGAAACAGT AACACTAGGC TCAAAAGTAT      2340
+     TTAAGTTTCT CTATGAAGAA AAGAAGGGCG AGACCGCATC CAAATGCCTT GACCCACAAG      2400
+     AAAAGAATGA TTCCGCTCTA GTGGACAGAA CCAAACCAAA AATGCAAAAG GTTAAGACCG      2460
+     CACCTAAGTG CCTTAAACCA AAGCATCAAC AGCAGAAGAA AGAGACCGCA TTACCCAAAT      2520
+     GCCTCATTTC AAATGTTGTT AAAGACACAG TGGACAATGA TGTAACACAT CTCGATCCCA      2580
+     TGTCCGTTGA CAACGATATA GTCAACTTCG CGATTAACAA TGAGTTACGC GAATGTAACG      2640
+     AGTATAGACT CGAACACTTA AATGCAGAGG AAGTTGAATG TTTAAAGAAG TTCCTATACG      2700
+     AATATAGAGA CATTCAGTAC AAAGAGGGCG AAAATTTGAC CTTCACCAGT ACTATTAAAC      2760
+     ATGTCATCCA GACTCAACAC GAAGACCCAG TATACCGTAA ACCCTACAAG TACCCTCAAA      2820
+     GCGTTGACCA AGAAGTTAAC AAACAAATTA AAGAAATGAT AGAACAAGGG ATTGTTCGCA      2880
+     AATCGAAGTC CCCTTATTGT TCTCCTATTT GGGTGGTCCC CAAGAAGGCA GACGCCTCTG      2940
+     GGAAACAAAA ATTCAGGTTG GTAGTCGATT ACAGGAACCT AAATGAGATA ACTGTTAACG      3000
+     ACAAATTTCC CATTCCCCGA ATGGATGAGA TATTGGACAA ACTAGGTAGA TGCCAATACT      3060
+     TTACCACTAT AGATCTAGCC AAGGGTTTTC ACCAAATCCA AATGGATGAA AATTCTATTG      3120
+     CAAAAACAGC TTTTTCAACT AAGCATGGGC ATTATGAATA TACTCGTATG CCCTTTGGTT      3180
+     TAAAAAACGC TCCAGCTACT TTTCAGAGAT GCATGAATAA TCTTCTGGAA GATTTAATCT      3240
+     ACAAAGACTG TTTAGTCTAT TTAGACGATA TTATTGTTTA TTCCACTCCA TTGGAAGAAC      3300
+     ACATTTTATC CCTAAAGAAA GTCTTTGAAA AACTGAGAGA CGCTAATTTA AAGTTGCAAC      3360
+     TAGATAAATG TGAATTCATG AAGAAAGAAA CTGAATTCCT AGGACACATC GTCACAACAA      3420
+     ATGGCATCAA ACCAAATCCA AATAAAACTA AAGCAATTAC AAATTTTCCA TTACCCAAGA      3480
+     CACCTAAGCA AATAAAATCA TTTTTGGGAT TATGTGGATT CTATCGCAAG TTTATTCCTA      3540
+     ACTTTGCCAA AATAGTTAAA CCCATGACCC TCAAATTAAA GAAAGGTGCT ATAATAGACA      3600
+     CCAAATGTAA AGAATACATC GAATCATTTG AAAAATTAAA AGTTTTGATA ACTTCAGACC      3660
+     CGATATTAAT CTATCCTGAT TTTTCAAAAC CTTTTTCTTT GACAACTGAT GCTAGCAACG      3720
+     TAGCTATTGG TGCAGTGTTA TCACAAAATC ACAAGCCAGT TTGTTATGCC AGTAGAACGC      3780
+     TAAACGAACA TGAAATCAAC TATGCTACGA TTGAAAAAGA ATTGTTAGCT ATAGTTTGGG      3840
+     CTACAAAATA TTTCAGGTCA TACTTATTCG GCAGACCATT TGAAGTATTA AGTGATCACA      3900
+     AGCCACTGGT ATGGCTCAAC AACATTAAAG AACCAAACAT GAAATTGCAA AGATGGAAAA      3960
+     TAAAACTTAA TGAATTCGAT TATAAAATCA AATATCTTCC AGGCAAAGAA AACCATGTCG      4020
+     CGGATGCTCT TTCCCGCACG AAAATAGAAG TTATGGTTGG CGAGGTCGCA AATAGCGCAG      4080
+     ACGCAACTAT ACACAGTGCC ATTGAAGATA ATCTAAATTA CATACCCATA ACAGAAAGAC      4140
+     CAATAAATTA CTTCTCTAGA CAAATAGAGA TAGAAAAAGG CGATAACGAT ACAACAAGTG      4200
+     TACAACATTT GTTTCAAAAA TTAAAGATTA AGATAGTCTA TAAAGAAATG ACACCTGAAC      4260
+     TCGCCAAAAA CCTCATTAAG GAATATGTGT GCACCAAAAA GAGTGCAATT TATTTCCCTA      4320
+     ATGACGAAGA TTTTCTGATC TTCCAGAGAG CGTTTACCGA AATTATAAGC CCTAACAATT      4380
+     TCACAAAACT CTTGAGATGT ACCACAAAGT TAATTGATAT ACTAACGTAT GCAGAATTCA      4440
+     AAGATTTAAT CTTAAAGAAA CATAAGGAAC TTTTACATCC GGGTATAGAA AAAACAATCA      4500
+     ATTTATTTAA AGAAGAATAT TACTATCCTG ATAGTCAAAA GCTTATTCAA ACCATTATCA      4560
+     ATGAATGTCA AATTTGTTAT CTAGCAAAAA CGGAACATCA AACACAAATG ACATATGAGA      4620
+     CTACACCAGA AATATTTAAC ACAAGAGAAA AATACATGAT AGATTTTTAT CTCACAGGAA      4680
+     ACCAGATCTT CTTATCTTGC ATTGATATCT ATTCGAAATT TGCATCACTA GTTGAATTAA      4740
+     AAAGTAGAGA TTGGCTAGAA GCAAAAAGAG CCATTACTAA AATATTCAAT GACATGGGAA      4800
+     AACCGCAAGA AATTAAAGCA GACAAAGACT CAGCTTTTAT GTGTTTAGCC TTACAAAATT      4860
+     GGTTAAGATC TGAAGGTGTA CAAATTTCTA TAAGCACTAG CAAAAATGGT ATATCTGATA      4920
+     TAGAAAGATT CCACAAGACC GTAAACGAAA AGCTAAGAAT CATTGGTAGC CAACAAAATG      4980
+     TTGAAGATAG GTGCACAAAA TTCGAAAGAA TTCTATACAT ATACAATCAC AAAACTAAAC      5040
+     ATAATAGTAC TAAAAGATTT CCAGCAGACA TTTTCCTATA TGCAGGCAGT CCAGATTTTA      5100
+     ATGTACAACA AAACAAAATC GATAGGATAG AATACCTCAA TAAGAATAGA CACGATTTTG      5160
+     AAGTTGATAT AAAATATAGA CAAGCCCCAC TTGTAAAAAG TAAAATAACC AATCCATTTA      5220
+     AAAAGACAGG AAGAATTGGA CAAGTAGATG ATAAACATTT CGAAGAACAA AATCGTGGCA      5280
+     GGAAGATCGT TCACTATAAG TCAAAATTTA AGAAACAGAA AAAGTTTAAT AAGAGCAAAT      5340
+     ATGATAATTC CAGACCAACC AAAGAAGCAC AAAGTACACA ACATACTTCT AATAATGCTT      5400
+     AGTTGCATAC TATCACTTAT CATCACGGTC AAGTGCAACA ATATAGAAGT AAATCCAGTA      5460
+     AACGCGAAAA ATGGATACCT TATATTCCAA ACAGGAACAA TGGAAATTCC AACCAGCTAT      5520
+     GAATACCATT ATTTAAGCAT AAACATAACA AAGACAATGC TCATGTTCGA AGATATAGTA      5580
+     AGTGAAGCAA ACAACTATCC TAATGTACCA CAAATACAAT ATTTAGTCGA CAAATTAAAA      5640
+     CGAGAAATAA ATGGGTTAAG AATTATTAGT CGAAGTAAAA GAGGTCTTTT AAACGTAGTA      5700
+     GGAAAAGCAT ACAAATACTT ATTCGGCACA TTAGATGAGG ATGACAGAGA AGAGTTAGAA      5760
+     GAAAAAATAA ACAACATGTC AGAAGACTCT GTAAAAACCC ATGACCTAAA CACGATTCTA      5820
+     GATGTAATCA ATAGTGGTAT AGATATAATT AATAAGCTCA AAGTAGATAA AGAACAACAC      5880
+     CAACAAATTG CGGTACTAAT ATTTAACCTA GAGCAATTTA CAGAATATAT AGAAGACATA      5940
+     GAATTGGGTC TGCAATTAAC CAGACTAGGA ATTTTCAATC CAAGATTACT AAAGCATGAC      6000
+     TATTTAAAAC ATGTAAATTC AGAAAAAATG CTAAAGATAA AAACGTCAAC CTGGCTTAAA      6060
+     ACAGACACGA ACGAAATTTT GATTATTTCC CATATTCCTA GCGAAGTTAC TAAAGTTCCA      6120
+     ATATTCCAAA TTGTTCCGTA CCCAGATGAA CATAATTATA TTCTAACCGA GCAAATATTC      6180
+     GATAAATTCT ACATATTTGA TAACCAAGTA TTCCATAAAG ATACCAATAG GGATATATTC      6240
+     GACAAATGTA TTATTGGAAT CATCAAACAA GAGCAAACTC AATGCAAATA TATTAAAACA      6300
+     CATAAAAATT ACCAAATAAA TTATATAGAA CCAAATATAC TATTAACATG GAATATTCCT      6360
+     GAAACAGCTG TTAACCAAGA CTGTACACAC AATAAAATAT TAATTTCAGG AAACAACATC      6420
+     ATTAAAATTA AAAATTGTAC CATACAAATA GATGAATTCT TAATCTCTAA TAATCTAGCA      6480
+     GACTTTACAC AAACAATTTA TATCACCAAC AATGTAACAC GTCTAGAACC AATAAATCAC      6540
+     TTACAAACGA GAGAAATGAT AGAAACCCAT GTAAAACACT ATAACTTTTT TCAAATTATA      6600
+     TGCATTACAA CGTTCGTCAT AATGATAATT AGTTTGACTC TGTATGTAGC ATATAAGTTT      6660
+     AAAAATATAC CTAAGAAAAT TATTGTCAAT ATCGTAAGCA AAAAGAACAC ACGCACCTTG      6720
+     AAAATAATGT CAATGAAAAT ATTCAACAAG GAAATAATAT TACCTTATAC CCAAATTTAA      6780
+     CGACCTGAGG ACAGGCCAAA TTCAAAGGTT GGGGGAGTGA CATATCCATA AGTCCCTAAG      6840
+     ACTTAAGCAT ATGCCTACAT ACTAATACAC TTACAACACA TACACCCCAA TACAACATAC      6900
+     ACTACTCCGG ATGTACCCAA CAGATACCAG ATAAGAATAA GATTGTTATA TGATCCTCGA      6960
+     GAATGGAAAA AACCCCAATT CTAGATAAGT CACCCACTGG TAGACTAAAC ATCCGTTCCC      7020
+     CTAATTTAAA CAATTCCTTG CTTAAGCCTC ACCCCATCGT CACATTCCCA CGTTCAAAGC      7080
+     TCGGAGCCGC AATCCCGAAA AACAAAAGTA TCGATTTCAA TAAACAAATT ATAAGAATCT      7140
+     AAGAGCACTT GTATCCAAGA GCAAATGCAC TTGAATCCAA GAGAAACGCA AAGCTTTTTC      7200
+     TCTTTACGAT CAGAATCCTA AAGTCTAAAG TCCATATTAG AAAAGCTCGA TACCGAGGCT      7260
+     TGAACGTCAA CCAAATCAGA ATAATTATCA GAGTTCAGTT TGAGACCTAA TTGTAAAAGG      7320
+     TTCGGTGTTC TTCTCAAATA AAAAGATTGT AATCATTTAG TGAAATAAAA ATTATATTTT      7380
+     TTTCACTTAT AAATATTGCA AGTATTTAAT T                                     7411
+//
+ID   DMIS176    standard; DNA; INV; 7439 BP.
+XX
+AC   X01472; J01060; J01061;
+XX
+DR   FLYBASE; FBgn0000004; 17.6.
+XX
+FT   source          X01472:1..7439
+FT   SO_feature      five_prime_LTR ; SO:0000425:1..512
+FT   SO_feature      three_prime_LTR ; SO:0000426:6928..7439
+FT   SO_feature      TATA_box ; SO:0000174:372..377
+FT   SO_feature      TATA_box ; SO:0000174:7271..7277
+FT   SO_feature      primer_binding_site ; SO:0005850:511..529
+FT   SO_feature      polyA_signal_sequence ; SO:0000551:372..377
+FT   SO_feature      polyA_signal_sequence ; SO:0000551:7299.7304
+FT   SO_feature      RR_tract ; SO:0000435:6917..6927
+FT   SO_feature      CDS ; SO:0000316:1074..2393
+FT                   /name="17.6\gag"
+FT                   /db_xref="FLYBASE:FBgn0044339"
+FT                   /db_xref="SWISS-PROT:P04282"
+FT                   /db_xref="NCBI_PROTEIN:CAA25701.1"
+FT                   /translation="MAQEPAIVPPLSDSNMTQVAYQIGNVEKFNGDPGSLYTFVSRIDY
+FT                   ILALYATGDERQQQIIFGHIERSISGEVMRCIGAYDMYTWQQLRRQLVLNYKPQTPNHV
+FT                   LLEEFRKTPFRGNVRAFLEEAESRRQTLTSKLELEQDLEEKTFYLKLIKSSIESLIEKL
+FT                   PTHIYLRINNHNIPDLRSLINLLQEKGMYEQINHTSTHVQKQNFSDKPQKSFNQNTNQS
+FT                   NNIRKYPTPFLHYNSPIPYQAPQIYQTPPTNNPLYRHPIPYHPNPNNVFQPSQQNNVFQ
+FT                   PSQQNNAFQPNQRTNFTSRPIFNTNRNNAFDQNRFGQQPQYQNQQSTQNSSSYVPNRPI
+FT                   KRLRPANSGQTGMSVDETLYQEDAFYQQCVPYDYFYYPTYDHSDYYPENQYQIDENNQN
+FT                   LQRTQQLQQINTDETNNDNQEPNVEQAENFQPQALENPNI"
+FT   SO_feature      CDS ; SO:0000316:2345..5518
+FT                   /name="17.6\pol"
+FT                   /db_xref="FLYBASE:FBgn0014453"
+FT                   /db_xref="SWISS-PROT:P04323"
+FT                   /db_xref="NCBI_PROTEIN:CAA25702.1"
+FT                   /translation="TGRKFSATSLGKPQYITIKYKENNLKCLIDTGSTVNMTSKNIFDL
+FT                   PIQNTSTFIHTSNGPLIVNKSIIIPSKILFPTTNEFLLHPFSENYDLLLGRKLLAEAKA
+FT                   TISYRDQEVTLYNNKYKLIEGIATHEQSHFQNVNMIPDTMLRQPNKISPILESDLYRLE
+FT                   HLNNEEKQRLCALLQKYHDIQYHEGDKLTFTNQTKHTINTKHNLPLYSKYSYPQAYEQE
+FT                   VESQIQDMLNQGIIRTSNSPYNSPIWVVPKKQDASGKQKFRIVIDYRKLNEITVGDRHP
+FT                   IPNMDEILGKLGRCNYFTTIDLAKGFHQIEMDPESVSKTAFSTKHGHYEYLRMPFGLKN
+FT                   APATFQRCMNDILRPLLNKHCLVYLDDIIVFSTSLDEHLQSLGLVFEKLAKANLKLQLD
+FT                   KCEFLKQETTFLGHVLTPDGIKPNPEKIEAIQKYPIPTKPKEIKAFLGLTGYYRKFIPN
+FT                   FADIAKPMTKCLKKNMKIDTTNPEYDSAFKKLKYLISEDPILKVPDFTKKFTLTTDASD
+FT                   VALGAVLSQDGHPLSYISRTLNEHEINYSTIEKELLAIVWATKTFRHYLLGRHFEISSD
+FT                   HQPLSWLYRMKDPNSKLTRWRVKLSEFDFDIKYIKGKENCVADALSRIKLEETYLSEQT
+FT                   QHSAEEDNSDLIFITERPLNTFNRQVIFSKGPPDIKVTKYFKKHITQIFYDIMTREKAE
+FT                   QYLIDHFCGKKSALYIESDADFEVIQAAHKLAINTKYTKILRSTILLKNITTYAEFKEL
+FT                   ILTAHEKLLHPGIQKTTKLFGETYYFPNSQLLIQNIINECSICNLAKTEHRNTDMPTKT
+FT                   TPKPEHCREKFMIDIYSSEGKHYVSCIDIYSKFATLEEIKTKDWIECKNALMRIFNQLG
+FT                   KPKLLKADRDGAFSSLALKRWLESEEVELQLNTTKTGVADIERLHKTINEKIRIIKTSD
+FT                   DEETKLSKMETVLNIYNHKTKHDTTGQTPAHIFLYAGQPILDTQQNKENKINKINNDRV
+FT                   EYEVDTRYRKGPLQKGKLENPFKPTKNVEQTDSDHYKITNRNRITHYYKTQFKKRKKNN
+FT                   QLSISQAPGT"
+FT   SO_feature      CDS ; SO:0000316:5488..6903
+FT                   /name="17.6\env"
+FT                   /db_xref="FLYBASE:FBgn0027624"
+FT                   /db_xref="SWISS-PROT:P04283"
+FT                   /db_xref="NCBI_PROTEIN:CAA25703.1"
+FT                   /translation="SALNFTGTWHLITLLLMLITTVHGQQIEINNIDTNHGYLLFSDKP
+FT                   VQIPSSFEHHCLRINLTEIDTIADYFEQRLRTDYHAPQVKFLYNKMRRELAGIALRHRN
+FT                   KRGLINIVGSVFKYLFGTLDENDRVDIQRKLETNAHNSVNLHELNDAIQLINDGMQKIQ
+FT                   NYENNSNIINSLLYELMQFTEYIEDVEMGMQLSRLGLFNPKLLNYDKLENVNSQNILNI
+FT                   KTSTWINYNDNQLLIISHIPINFSLINTVKIIPYPDSNGYQLEYTDTQSYFERENKVYN
+FT                   NENKEINNECVTNIIKHLKPICNFESIHTDEIIKYIEPNTIVTWNLTQTSLKQNCQNSF
+FT                   NNIKIKGNKMIKVTQCKIEINSIILSENLFKPEIDLTPLYTPLNITKIKTVKHNDINEM
+FT                   ISQNNITLYIFMTTVIIILILLYLYLRYVSFNPFMMLYAKLKLRKNQNQNTAQQIEMED
+FT                   VPLPLLYPSIPAQV"
+XX
+CC   Derived from X01472 (g8142) (Rel. 36, Last updated, Version 2).
+CC   Takis Benos and Michael Ashburner, 20-Aug-1997.
+CC   Any changes to original sequence record are annotated in an FT line.
+XX
+SQ   Sequence 7439 BP;  2985 A; 1512 C; 1048 G; 1894 T; 0 other;
+     AGTGACATAT TCACATACAA AACCACATAA CATAGAGTAA ACATATTGAA AAGCCGCATA        60
+     CGTAAACAAT AAGTGACCAC CATGCTAATG TGGATCAAAT AACAAAAATA TCCACTCTGC       120
+     ATTTTGACAC CCCCATACTG TATGCCATCT GCGCAGTATG CATTCTAATA AACAAATTCT       180
+     TTGACAGCGG CACTTAGCCA TTCTTGTAAA CAAATCTTAA AGTCTGCCTG CTCTCTCTGA       240
+     GGCTTCTCCT CCACTTAAGA ATCCAAGAGC AATGCTCTCC CAAAAACACT AACATATTCT       300
+     TTAAGCAAGC ACAGAGGCTT CTCCTCATTT TCACTTTCAT TTGATTTTCA GTCTTAAGCT       360
+     GAACGTTAAT CAATAAACAA CACAATCGAT ACCGAAATTT TGATTCGTTT TATTTTGGCA       420
+     AAACTCAATT TTCAGCGTTG GTCTTAGTTC ATATTCGGAA CGGTCCATTT AATAGACTCA       480
+     AAACTATTTA TTGCAACCAT TTATTTGCAA TTGGCGCAGT CGATGTGATC AGTGTTAAAG       540
+     TTCCTTGATG CGGTAACCAG ATTTGCCAAT TCCTGTGTTC TTTTTGTTCT CTGACAAAAG       600
+     TACCACGATA ACGGGCACCC ACGTGACGGT TAATATCGCT TTAAGTTTTT AATTAAACCT       660
+     CGACAATAAA GTGAAACCGA AAAATCACAA TTTGCCTAAA CAAACCTGAA TTTATTATCA       720
+     GGAAGACGCT ATTGAATTTG TGAGAGGCTG TAAATCCAAT TGGTTACCTC AAAGACCCAC       780
+     GAAAAAGCTA TAGTGCAACC CTTGCGAAAA TCAAAACCTA TCTTAAAAAA AAAAAAAAAA       840
+     TATAAATAAT AAATTAATAA GCGAAAATTA AAACGTATTA AAAGTAAGAA TAATAAATAA       900
+     ATAAGTGAAA ATTCTATATG ATAAAAATTA AAAATAAGAA TAATAAATAA AAAGACAACA       960
+     TTTTAAATTA AACAATATTA AAAAAATATA AAAATATTAA AAACTATATT AAAAAAAAAA      1020
+     AAAAAACAAA AAAACAAAAA AAAAAAAATA AATAAATAAT CCAAAAATCA AAAATGGCTC      1080
+     AAGAACCAGC AATTGTGCCA CCACTATCAG ACAGCAACAT GACCCAGGTT GCCTACCAGA      1140
+     TTGGCAATGT GGAGAAATTC AACGGTGATC CAGGCTCACT ATACACCTTT GTGAGTCGAA      1200
+     TTGATTACAT ACTGGCTCTT TATGCTACCG GAGATGAACG CCAACAGCAG ATCATATTTG      1260
+     GGCATATTGA ACGCAGCATC AGCGGAGAAG TTATGCGCTG CATTGGAGCC TATGACATGT      1320
+     ACACCTGGCA GCAGCTTAGA AGACAATTGG TACTCAACTA TAAACCCCAG ACCCCTAACC      1380
+     ACGTTCTTTT AGAAGAGTTT CGAAAGACCC CATTTCGAGG CAATGTACGA GCATTCCTGG      1440
+     AAGAAGCAGA AAGCCGCAGA CAAACACTTA CTAGTAAGCT TGAATTAGAG CAAGATCTTG      1500
+     AAGAAAAGAC TTTTTATTTG AAATTAATAA AATCCAGTAT AGAATCACTA ATTGAAAAAT      1560
+     TACCTACACA CATTTATTTA AGAATAAATA ACCACAACAT ACCAGATTTG CGATCACTTA      1620
+     TAAACCTTTT ACAAGAGAAG GGCATGTACG AACAAATAAA TCATACAAGT ACACATGTCC      1680
+     AAAAACAAAA TTTCTCTGAT AAGCCACAAA AGTCCTTTAA TCAAAATACT AATCAGTCTA      1740
+     ACAATATCAG AAAATATCCA ACACCTTTCC TACATTATAA TTCACCAATA CCATATCAAG      1800
+     CTCCACAAAT TTATCAAACA CCACCAACTA ATAACCCACT TTATCGTCAT CCAATACCCT      1860
+     ACCACCCTAA TCCAAACAAT GTTTTTCAAC CAAGCCAACA AAACAATGTT TTCCAACCAA      1920
+     GCCAACAAAA CAATGCTTTT CAACCAAATC AACGAACAAA CTTTACATCT CGACCAATTT      1980
+     TTAACACCAA TCGAAACAAT GCATTCGATC AGAATAGGTT CGGACAACAA CCCCAATATC      2040
+     AAAATCAACA ATCAACACAA AATTCAAGTT CCTATGTACC CAATCGACCA ATAAAACGAT      2100
+     TAAGACCAGC TAATAGTGGA CAGACTGGGA TGAGTGTTGA CGAAACATTA TATCAAGAGG      2160
+     ACGCTTTTTA TCAGCAGTGT GTTCCATATG ACTATTTTTA TTATCCAACT TACGACCATT      2220
+     CAGACTATTA TCCAGAAAAT CAATATCAAA TTGACGAAAA CAACCAAAAT TTACAAAGAA      2280
+     CACAACAGTT ACAGCAGATT AATACAGACG AGACAAACAA TGACAACCAA GAACCCAATG      2340
+     TTGAACAGGC CGAAAATTTT CAGCCACAAG CCTTGGAAAA CCCCAATATA TAACAATTAA      2400
+     ATACAAAGAA AATAATTTGA AATGCCTTAT TGATACCGGA TCAACAGTTA ACATGACATC      2460
+     TAAAAATATA TTTGATTTAC CAATCCAGAA TACTAGTACT TTTATTCATA CCAGCAATGG      2520
+     ACCGCTCATT GTCAACAAAA GTATAATCAT ACCTTCAAAG ATTTTGTTCC CAACAACAAA      2580
+     TGAATTTTTA TTGCACCCTT TCTCTGAGAA TTACGATCTT TTATTAGGAA GAAAACTTTT      2640
+     AGCAGAAGCA AAAGCAACAA TAAGTTACCG CGATCAAGAG GTAACTCTTT ACAACAACAA      2700
+     ATACAAATTA ATAGAAGGAA TAGCAACACA TGAACAGAGT CATTTTCAAA ATGTAAATAT      2760
+     GATACCTGAC ACCATGCTCA GACAGCCAAA TAAAATTTCA CCCATTTTAG AATCAGACCT      2820
+     ATACAGATTG GAACATTTAA ATAACGAAGA AAAACAAAGA TTGTGCGCAC TCCTGCAGAA      2880
+     ATACCATGAC ATACAGTACC ATGAAGGTGA TAAGTTGACA TTTACTAATC AAACCAAACA      2940
+     TACTATCAAT ACAAAGCACA ATCTACCACT TTACTCTAAA TACAGTTACC CACAGGCTTA      3000
+     TGAACAGGAG GTCGAAAGCC AAATACAAGA TATGCTAAAT CAAGGTATTA TACGTACCAG      3060
+     TAATTCACCT TACAATAGCC CCATCTGGGT GGTTCCAAAG AAACAAGATG CATCAGGCAA      3120
+     ACAGAAATTT AGAATTGTAA TAGACTACCG AAAATTAAAT GAAATAACAG TAGGAGACAG      3180
+     ACACCCAATC CCAAACATGG ACGAAATCTT GGGAAAATTG GGCAGATGTA ATTACTTCAC      3240
+     AACTATAGAC TTGGCAAAGG GTTTCCACCA GATCGAAATG GATCCAGAAT CAGTTTCAAA      3300
+     GACAGCCTTT TCTACCAAGC ACGGTCATTA TGAATATTTG CGCATGCCAT TCGGATTAAA      3360
+     AAACGCGCCA GCCACCTTTC AACGGTGCAT GAATGATATT TTAAGACCAC TCTTAAACAA      3420
+     ACACTGTCTT GTGTATTTGG ACGACATAAT TGTATTCTCG ACATCCCTTG ATGAACACCT      3480
+     GCAATCGCTC GGACTAGTTT TCGAAAAATT AGCAAAAGCC AACCTTAAAT TACAACTTGA      3540
+     CAAATGTGAG TTTCTCAAGC AAGAAACCAC ATTTTTAGGA CATGTTCTAA CACCAGATGG      3600
+     AATAAAACCA AACCCTGAAA AAATTGAAGC CATTCAAAAA TATCCAATTC CCACTAAACC      3660
+     AAAAGAAATA AAAGCTTTTC TTGGACTGAC AGGATATTAT CGTAAATTTA TTCCAAACTT      3720
+     TGCAGACATA GCCAAACCCA TGACTAAGTG TTTAAAAAAG AACATGAAAA TTGACACTAC      3780
+     CAACCCAGAA TATGACTCTG CATTTAAAAA ATTAAAATAT CTAATATCAG AAGACCCAAT      3840
+     TCTTAAAGTA CCCGACTTTA CAAAGAAATT CACTTTAACC ACAGACGCAA GTGATGTCGC      3900
+     TTTGGGGGCA GTACTGTCAC AAGATGGACA CCCACTTAGC TACATTAGCC GAACACTTAA      3960
+     TGAACACGAA ATAAATTACA GCACAATTGA AAAAGAACTC TTAGCAATTG TATGGGCGAC      4020
+     AAAGACTTTT CGACACTACC TACTTGGAAG ACACTTTGAA ATATCCAGTG ACCATCAACC      4080
+     ATTGAGCTGG TTGTACCGTA TGAAAGACCC AAATTCAAAA CTGACCCGAT GGAGAGTAAA      4140
+     ATTATCCGAA TTCGATTTTG ATATAAAATA TATAAAAGGA AAAGAAAATT GCGTGGCGGA      4200
+     TGCTCTGTCC AGAATAAAAC TTGAGGAGAC ATATTTGAGC GAACAAACCC AACATAGTGC      4260
+     AGAAGAGGAC AATAGTGATT TAATTTTTAT TACAGAAAGA CCTCTAAATA CATTTAACAG      4320
+     ACAAGTTATA TTTTCAAAAG GACCACCAGA CATTAAAGTT ACGAAATATT TCAAAAAACA      4380
+     CATCACCCAA ATATTTTACG ACATTATGAC CAGGGAAAAA GCCGAACAAT ATTTGATAGA      4440
+     CCATTTTTGT GGTAAGAAAA GTGCGTTGTA TATTGAGAGT GACGCTGATT TCGAAGTCAT      4500
+     TCAAGCCGCA CATAAATTAG CCATAAACAC CAAATATACA AAAATCCTGC GTAGCACGAT      4560
+     TTTGTTAAAA AACATAACCA CTTATGCGGA ATTTAAGGAA TTGATCTTGA CTGCTCATGA      4620
+     AAAACTTCTA CACCCAGGCA TACAGAAAAC TACTAAACTT TTCGGAGAAA CTTACTATTT      4680
+     CCCTAATAGC CAGCTACTTA TTCAGAATAT AATAAATGAG TGCAGTATTT GCAATCTGGC      4740
+     AAAAACAGAG CACCGAAATA CAGACATGCC AACGAAAACC ACACCCAAAC CAGAACATTG      4800
+     CCGCGAAAAA TTCATGATAG ACATTTACTC ATCCGAAGGC AAACATTACG TTAGTTGCAT      4860
+     AGACATTTAT TCGAAATTTG CCACATTAGA AGAAATAAAA ACAAAAGACT GGATAGAATG      4920
+     CAAAAACGCG CTTATGCGCA TATTCAACCA GCTTGGCAAG CCAAAGTTAC TAAAGGCGGA      4980
+     CAGAGACGGC GCATTTTCCA GTTTAGCCCT CAAGAGATGG CTGGAGAGTG AGGAAGTCGA      5040
+     ATTGCAGCTT AACACAACAA AAACTGGTGT GGCGGACATA GAAAGACTAC ATAAAACAAT      5100
+     TAATGAAAAG ATTCGCATAA TCAAAACATC CGATGACGAA GAAACCAAAT TGAGCAAAAT      5160
+     GGAAACAGTA CTTAACATAT ACAATCATAA AACCAAACAC GACACCACTG GACAGACCCC      5220
+     TGCACACATA TTTCTCTACG CTGGACAACC AATATTAGAT ACCCAACAAA ACAAAGAAAA      5280
+     CAAAATAAAC AAAATAAATA ATGACAGAGT GGAGTACGAA GTCGACACAA GATACAGAAA      5340
+     AGGTCCACTA CAGAAAGGCA AATTAGAAAA TCCTTTTAAG CCAACAAAAA ATGTGGAGCA      5400
+     GACTGACTCT GATCATTATA AAATTACTAA TAGAAATAGA ATTACTCACT ACTACAAAAC      5460
+     ACAATTCAAA AAACGAAAGA AAAATAATCA GCTCTCAATT TCACAGGCAC CTGGCACTTG      5520
+     ATAACATTGC TGCTGATGCT GATCACAACA GTTCATGGAC AACAAATTGA AATTAATAAT      5580
+     ATTGACACAA ACCACGGATA TCTCCTTTTT TCTGATAAAC CAGTCCAGAT ACCATCATCC      5640
+     TTTGAACATC ATTGCTTGAG AATCAATTTA ACTGAAATAG ACACCATAGC TGATTATTTT      5700
+     GAGCAAAGAC TACGTACCGA CTACCATGCA CCCCAGGTCA AATTTTTATA CAACAAAATG      5760
+     AGAAGAGAAC TAGCTGGAAT AGCCTTGCGA CATAGAAATA AACGGGGACT TATTAACATT      5820
+     GTAGGTTCAG TTTTTAAATA CCTATTTGGC ACACTTGACG AAAATGATCG AGTGGATATA      5880
+     CAGAGGAAAC TTGAAACAAA CGCCCATAAC TCGGTAAATT TACATGAACT CAATGACGCT      5940
+     ATTCAATTAA TAAATGACGG AATGCAAAAG ATACAGAATT ATGAAAACAA CAGCAACATC      6000
+     ATTAACAGTC TTTTATATGA ACTCATGCAG TTTACAGAAT ACATAGAAGA TGTGGAAATG      6060
+     GGAATGCAGC TTTCCAGACT CGGTCTATTT AATCCCAAAC TACTAAACTA CGATAAACTT      6120
+     GAGAATGTAA ACAGCCAAAA TATTTTAAAC ATTAAAACAT CCACTTGGAT TAATTACAAT      6180
+     GATAACCAAT TATTAATCAT ATCTCACATA CCTATTAACT TTTCATTAAT AAATACAGTA      6240
+     AAAATAATCC CTTACCCAGA CTCGAACGGC TATCAGCTAG AATACACAGA CACACAATCA      6300
+     TATTTTGAAA GAGAAAATAA AGTTTACAAT AACGAAAATA AAGAAATAAA CAATGAGTGT      6360
+     GTCACCAACA TTATTAAACA TTTAAAACCA ATTTGTAATT TTGAGTCAAT CCACACAGAT      6420
+     GAAATAATAA AATACATAGA ACCAAACACA ATTGTAACCT GGAATTTAAC CCAAACAAGT      6480
+     CTCAAACAAA ATTGTCAAAA TTCATTTAAT AATATAAAAA TAAAAGGAAA CAAAATGATA      6540
+     AAAGTAACCC AATGTAAAAT AGAAATCAAT AGCATAATTC TAAGTGAAAA TCTCTTTAAA      6600
+     CCAGAAATAG ATTTGACACC ATTATACACA CCACTTAACA TAACAAAAAT AAAAACTGTT      6660
+     AAACACAACG ACATTAATGA AATGATTTCA CAAAACAATA TTACACTTTA CATATTTATG      6720
+     ACTACTGTCA TCATTATACT TATTTTATTG TACTTATATT TAAGATACGT ATCATTTAAC      6780
+     CCATTCATGA TGCTGTATGC AAAACTAAAA TTAAGAAAAA ATCAAAATCA AAACACAGCA      6840
+     CAACAAATAG AAATGGAAGA CGTTCCATTA CCCCTACTAT ATCCATCAAT CCCAGCCCAA      6900
+     GTATAGGCTT CTCTTTAAGG GAAGGGAAGT GACATATTCA CATACAAAAC CACATAACGT      6960
+     AGAGTAAACA TATTGAAAAG CCGCATACGT CAACAATAAG TGACCACCAT GCTAATGTGG      7020
+     ATCAAATAAC AAAAATATCC ACTCTGCATT TTGACACCCC CATACTGTAT GCCATCTGCG      7080
+     CAGTATGCAT TCTAATAAAC AAATTCTTTG ACAGCGGCAC TTAGCCATTC TTGTAAACAA      7140
+     ATCTTAAAGT CTGCCTGCTC TCTCTGAGGC TTCTCCTCCA CTTAAGAATC CAAGAGCAAT      7200
+     GCTCTCCCAA AAACACTAAC ATATTCTTTA AGCAAGCACA GAGGCTTCTC CTCATTTTCA      7260
+     CTTTCATTTG ATTTTCAGTC TTAAGCTGAA CGTTAATCAA TAAACAACAC AATCGATACC      7320
+     GAAATTTTGA TTCGTTTTAT TTTGGCAAAA CTCAATTTTC AGCGTTGGTC TTAGTTCATA      7380
+     TTCGGAACGG TCCATTTAAT AGACTCAAAA CTATTTATTG CAACCATTTA TTTGCAATT       7439
+//
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.fasta	Fri Apr 22 12:09:14 2016 -0400
@@ -0,0 +1,4 @@
+>DME9736
+GTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAACACATACACCCCAATACAACATACACTACTCCGGATGTACCCAACAGATACCAGATAAGAATAAGATTGTTATATGATCCTCGAGAATGGAAAAAACCCCAATTCTAGATAAGTCACCCACTGGTAGACTAAACATCCGTCCCCTAATTTAAACAATTCCTTGCTTAAGCCTCACCCCATCGTCACATTCCCACGTTCAAAGCTCGGAGCCGCAATCCCGAAAAACAAAAGTATCGATTTCAATAAACAAATTATAAGAATCTAAGAGCACTTGTATCCAAGAGCAAATGCACTTGAATCCAAGAGAAACGCAAAGCTTTTTCTCTTTACGATCAGAATCCTAAAGTCTAAAGTCCATATTAGAAAAGCTCGATACCGAGGCTTGAACGTCAACCAAATCAGAATAATTATCAGAGTTCAGTTTGAGACCTAATTGTAAAAGGTTCGGTGTTCTTCTCAAATAAAAAGATTGTAATCATTTAGTGAAATAAAAATTATATTTTTTTCACTTATAAATATTGCAAGTATTTAATTGGCGCAGTCGGTTAGGATCCAATAAAATAAAAGAGTCCTTTTAGTACGGTACTGATCAACTGAAGGATATGCTATACGACTAGCTATCCAAGATCAGCGAATTAAAATAGTGATTCAAAAATATTTTTTAATCCGCAAAAGAATCTACGTGAAAGTAGTATTCAAAATAAAATCCCGTGCGGTCGGAAACAAAAATTAATTTAAATTTTTTAATTCCGAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTCAAAATCAAGAAAACTTAAAGCCAAAATAAGCTAGAAAACTAAAAGACATCATGGCAGTCCCACAACTCTCAGAAACACACCTAAACCAACTGCTAAACCAAATCAAAGAATTAAACTACTACGATGGCGCACCTGGCAAATTATCTGGATTCGTCAACCAAGTGGAACAACTGCTCAGTTTATACCCAACACAGGAAGCAAGACAGGCACACGTCATATATGGAGCAGTGAAGCGGTTATTAGTGGATTCAGCCTTAGAAGTCGTAACCCAGGAAAGAGCTAACACATGGCTGGACATGAAGAAAGCACTGGCAATGGCATTCAAAGACCATAGACCTTATGTAACTCTCATCAGACAATTAGAAGACATATCATACCCAGGAAGTATCTGTAAGTTTATAGAAAAATTAGAAACACAATACTGGATTATGTTCGATAAGTTAGAATTAGAAAGTGACCATGTTGATAAATCGAATTATACCGAAATGTTAAACAAAACTGTTAAATCAGTAATAGATCGAAAACTGCCGGATAGAATTTATATGTCTTTGGCACGTAAAGATATTGATACAATTTATAAATTAAAACAAGCATCAATGGAATTAGGCCTTTATGATGCTATTCCAGAAAATCACCGTTCTAATAGAACAGAAATGAATAAACGTAGGAACAGGGGAAACTATAATCAAAATAATAATCAAAAATATTACAATAATAGAAATCACAACTACAGTAATTATTATCCTAGCATGAATCAGAATCATAATACACAACCACCTCAGAATCCGACTCAACCTATGACAAATCAAAACCAATATTCACCGCGTTTCATACCGAATAATCAAAGAGGGAATTATTATGCATTTAGACGAGACTTAACACAAGCTCAGCAGAACAACCCACTTAATAACACCCTTAACTTCCAACCTTCGACATCGAATAATATTAACAGACAAGGGCCAGTAAAAAGACAACGCGAGAGTCAGAGTGACCAAAGCAGGATGGATGTAAATTTTCATCAAGCTGCCTCGGACACTCAAATGATAGAGAAGGACATACAAGTCCCTATGTAAAAATAATTCATCATAATAAAAATTATAAGGGAATGATCGATACAGGATCATCAATTAACATCATAAGAGAAAATTTTGAGAACTTAGAAGAAAAGGAAGAAAACCTAATAGTATACACTATTAAAGGACCAATAACACTAAAGAGAAGTATAATAATAAAACCTACTTCAGTATGTCCGTCTGCTCAAAAATTCTACATTCACAAATTTTCTGATAACTATGATTTCTTGTTAGGTCGAAAGTATTTAGAAGATACAAAAGCTAAAATAGATTATGCTAACGAAACAGTAACACTAGGCTCAAAAGTATTTAAGTTTCTCTATGAAGAAAAGAAGGGCGAGACCGCATCCAAATGCCTTGACCCACAAGAAAAGAATGATTCCGCTCTAGTGGACAGAACCAAACCAAAAATGCAAAAGGTTAAGACCGCACCTAAGTGCCTTAAACCAAAGCATCAACAGCAGAAGAAAGAGACCGCATTACCCAAATGCCTCATTTCAAATGTTGTTAAAGACACAGTGGACAATGATGTAACACATCTCGATCCCATGTCCGTTGACAACGATATAGTCAACTTCGCGATTAACAATGAGTTACGCGAATGTAACGAGTATAGACTCGAACACTTAAATGCAGAGGAAGTTGAATGTTTAAAGAAGTTCCTATACGAATATAGAGACATTCAGTACAAAGAGGGCGAAAATTTGACCTTCACCAGTACTATTAAACATGTCATCCAGACTCAACACGAAGACCCAGTATACCGTAAACCCTACAAGTACCCTCAAAGCGTTGACCAAGAAGTTAACAAACAAATTAAAGAAATGATAGAACAAGGGATTGTTCGCAAATCGAAGTCCCCTTATTGTTCTCCTATTTGGGTGGTCCCCAAGAAGGCAGACGCCTCTGGGAAACAAAAATTCAGGTTGGTAGTCGATTACAGGAACCTAAATGAGATAACTGTTAACGACAAATTTCCCATTCCCCGAATGGATGAGATATTGGACAAACTAGGTAGATGCCAATACTTTACCACTATAGATCTAGCCAAGGGTTTTCACCAAATCCAAATGGATGAAAATTCTATTGCAAAAACAGCTTTTTCAACTAAGCATGGGCATTATGAATATACTCGTATGCCCTTTGGTTTAAAAAACGCTCCAGCTACTTTTCAGAGATGCATGAATAATCTTCTGGAAGATTTAATCTACAAAGACTGTTTAGTCTATTTAGACGATATTATTGTTTATTCCACTCCATTGGAAGAACACATTTTATCCCTAAAGAAAGTCTTTGAAAAACTGAGAGACGCTAATTTAAAGTTGCAACTAGATAAATGTGAATTCATGAAGAAAGAAACTGAATTCCTAGGACACATCGTCACAACAAATGGCATCAAACCAAATCCAAATAAAACTAAAGCAATTACAAATTTTCCATTACCCAAGACACCTAAGCAAATAAAATCATTTTTGGGATTATGTGGATTCTATCGCAAGTTTATTCCTAACTTTGCCAAAATAGTTAAACCCATGACCCTCAAATTAAAGAAAGGTGCTATAATAGACACCAAATGTAAAGAATACATCGAATCATTTGAAAAATTAAAAGTTTTGATAACTTCAGACCCGATATTAATCTATCCTGATTTTTCAAAACCTTTTTCTTTGACAACTGATGCTAGCAACGTAGCTATTGGTGCAGTGTTATCACAAAATCACAAGCCAGTTTGTTATGCCAGTAGAACGCTAAACGAACATGAAATCAACTATGCTACGATTGAAAAAGAATTGTTAGCTATAGTTTGGGCTACAAAATATTTCAGGTCATACTTATTCGGCAGACCATTTGAAGTATTAAGTGATCACAAGCCACTGGTATGGCTCAACAACATTAAAGAACCAAACATGAAATTGCAAAGATGGAAAATAAAACTTAATGAATTCGATTATAAAATCAAATATCTTCCAGGCAAAGAAAACCATGTCGCGGATGCTCTTTCCCGCACGAAAATAGAAGTTATGGTTGGCGAGGTCGCAAATAGCGCAGACGCAACTATACACAGTGCCATTGAAGATAATCTAAATTACATACCCATAACAGAAAGACCAATAAATTACTTCTCTAGACAAATAGAGATAGAAAAAGGCGATAACGATACAACAAGTGTACAACATTTGTTTCAAAAATTAAAGATTAAGATAGTCTATAAAGAAATGACACCTGAACTCGCCAAAAACCTCATTAAGGAATATGTGTGCACCAAAAAGAGTGCAATTTATTTCCCTAATGACGAAGATTTTCTGATCTTCCAGAGAGCGTTTACCGAAATTATAAGCCCTAACAATTTCACAAAACTCTTGAGATGTACCACAAAGTTAATTGATATACTAACGTATGCAGAATTCAAAGATTTAATCTTAAAGAAACATAAGGAACTTTTACATCCGGGTATAGAAAAAACAATCAATTTATTTAAAGAAGAATATTACTATCCTGATAGTCAAAAGCTTATTCAAACCATTATCAATGAATGTCAAATTTGTTATCTAGCAAAAACGGAACATCAAACACAAATGACATATGAGACTACACCAGAAATATTTAACACAAGAGAAAAATACATGATAGATTTTTATCTCACAGGAAACCAGATCTTCTTATCTTGCATTGATATCTATTCGAAATTTGCATCACTAGTTGAATTAAAAAGTAGAGATTGGCTAGAAGCAAAAAGAGCCATTACTAAAATATTCAATGACATGGGAAAACCGCAAGAAATTAAAGCAGACAAAGACTCAGCTTTTATGTGTTTAGCCTTACAAAATTGGTTAAGATCTGAAGGTGTACAAATTTCTATAAGCACTAGCAAAAATGGTATATCTGATATAGAAAGATTCCACAAGACCGTAAACGAAAAGCTAAGAATCATTGGTAGCCAACAAAATGTTGAAGATAGGTGCACAAAATTCGAAAGAATTCTATACATATACAATCACAAAACTAAACATAATAGTACTAAAAGATTTCCAGCAGACATTTTCCTATATGCAGGCAGTCCAGATTTTAATGTACAACAAAACAAAATCGATAGGATAGAATACCTCAATAAGAATAGACACGATTTTGAAGTTGATATAAAATATAGACAAGCCCCACTTGTAAAAAGTAAAATAACCAATCCATTTAAAAAGACAGGAAGAATTGGACAAGTAGATGATAAACATTTCGAAGAACAAAATCGTGGCAGGAAGATCGTTCACTATAAGTCAAAATTTAAGAAACAGAAAAAGTTTAATAAGAGCAAATATGATAATTCCAGACCAACCAAAGAAGCACAAAGTACACAACATACTTCTAATAATGCTTAGTTGCATACTATCACTTATCATCACGGTCAAGTGCAACAATATAGAAGTAAATCCAGTAAACGCGAAAAATGGATACCTTATATTCCAAACAGGAACAATGGAAATTCCAACCAGCTATGAATACCATTATTTAAGCATAAACATAACAAAGACAATGCTCATGTTCGAAGATATAGTAAGTGAAGCAAACAACTATCCTAATGTACCACAAATACAATATTTAGTCGACAAATTAAAACGAGAAATAAATGGGTTAAGAATTATTAGTCGAAGTAAAAGAGGTCTTTTAAACGTAGTAGGAAAAGCATACAAATACTTATTCGGCACATTAGATGAGGATGACAGAGAAGAGTTAGAAGAAAAAATAAACAACATGTCAGAAGACTCTGTAAAAACCCATGACCTAAACACGATTCTAGATGTAATCAATAGTGGTATAGATATAATTAATAAGCTCAAAGTAGATAAAGAACAACACCAACAAATTGCGGTACTAATATTTAACCTAGAGCAATTTACAGAATATATAGAAGACATAGAATTGGGTCTGCAATTAACCAGACTAGGAATTTTCAATCCAAGATTACTAAAGCATGACTATTTAAAACATGTAAATTCAGAAAAAATGCTAAAGATAAAAACGTCAACCTGGCTTAAAACAGACACGAACGAAATTTTGATTATTTCCCATATTCCTAGCGAAGTTACTAAAGTTCCAATATTCCAAATTGTTCCGTACCCAGATGAACATAATTATATTCTAACCGAGCAAATATTCGATAAATTCTACATATTTGATAACCAAGTATTCCATAAAGATACCAATAGGGATATATTCGACAAATGTATTATTGGAATCATCAAACAAGAGCAAACTCAATGCAAATATATTAAAACACATAAAAATTACCAAATAAATTATATAGAACCAAATATACTATTAACATGGAATATTCCTGAAACAGCTGTTAACCAAGACTGTACACACAATAAAATATTAATTTCAGGAAACAACATCATTAAAATTAAAAATTGTACCATACAAATAGATGAATTCTTAATCTCTAATAATCTAGCAGACTTTACACAAACAATTTATATCACCAACAATGTAACACGTCTAGAACCAATAAATCACTTACAAACGAGAGAAATGATAGAAACCCATGTAAAACACTATAACTTTTTTCAAATTATATGCATTACAACGTTCGTCATAATGATAATTAGTTTGACTCTGTATGTAGCATATAAGTTTAAAAATATACCTAAGAAAATTATTGTCAATATCGTAAGCAAAAAGAACACACGCACCTTGAAAATAATGTCAATGAAAATATTCAACAAGGAAATAATATTACCTTATACCCAAATTTAACGACCTGAGGACAGGCCAAATTCAAAGGTTGGGGGAGTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAACACATACACCCCAATACAACATACACTACTCCGGATGTACCCAACAGATACCAGATAAGAATAAGATTGTTATATGATCCTCGAGAATGGAAAAAACCCCAATTCTAGATAAGTCACCCACTGGTAGACTAAACATCCGTTCCCCTAATTTAAACAATTCCTTGCTTAAGCCTCACCCCATCGTCACATTCCCACGTTCAAAGCTCGGAGCCGCAATCCCGAAAAACAAAAGTATCGATTTCAATAAACAAATTATAAGAATCTAAGAGCACTTGTATCCAAGAGCAAATGCACTTGAATCCAAGAGAAACGCAAAGCTTTTTCTCTTTACGATCAGAATCCTAAAGTCTAAAGTCCATATTAGAAAAGCTCGATACCGAGGCTTGAACGTCAACCAAATCAGAATAATTATCAGAGTTCAGTTTGAGACCTAATTGTAAAAGGTTCGGTGTTCTTCTCAAATAAAAAGATTGTAATCATTTAGTGAAATAAAAATTATATTTTTTTCACTTATAAATATTGCAAGTATTTAATT
+>DMIS176
+AGTGACATATTCACATACAAAACCACATAACATAGAGTAAACATATTGAAAAGCCGCATACGTAAACAATAAGTGACCACCATGCTAATGTGGATCAAATAACAAAAATATCCACTCTGCATTTTGACACCCCCATACTGTATGCCATCTGCGCAGTATGCATTCTAATAAACAAATTCTTTGACAGCGGCACTTAGCCATTCTTGTAAACAAATCTTAAAGTCTGCCTGCTCTCTCTGAGGCTTCTCCTCCACTTAAGAATCCAAGAGCAATGCTCTCCCAAAAACACTAACATATTCTTTAAGCAAGCACAGAGGCTTCTCCTCATTTTCACTTTCATTTGATTTTCAGTCTTAAGCTGAACGTTAATCAATAAACAACACAATCGATACCGAAATTTTGATTCGTTTTATTTTGGCAAAACTCAATTTTCAGCGTTGGTCTTAGTTCATATTCGGAACGGTCCATTTAATAGACTCAAAACTATTTATTGCAACCATTTATTTGCAATTGGCGCAGTCGATGTGATCAGTGTTAAAGTTCCTTGATGCGGTAACCAGATTTGCCAATTCCTGTGTTCTTTTTGTTCTCTGACAAAAGTACCACGATAACGGGCACCCACGTGACGGTTAATATCGCTTTAAGTTTTTAATTAAACCTCGACAATAAAGTGAAACCGAAAAATCACAATTTGCCTAAACAAACCTGAATTTATTATCAGGAAGACGCTATTGAATTTGTGAGAGGCTGTAAATCCAATTGGTTACCTCAAAGACCCACGAAAAAGCTATAGTGCAACCCTTGCGAAAATCAAAACCTATCTTAAAAAAAAAAAAAAAATATAAATAATAAATTAATAAGCGAAAATTAAAACGTATTAAAAGTAAGAATAATAAATAAATAAGTGAAAATTCTATATGATAAAAATTAAAAATAAGAATAATAAATAAAAAGACAACATTTTAAATTAAACAATATTAAAAAAATATAAAAATATTAAAAACTATATTAAAAAAAAAAAAAAAACAAAAAAACAAAAAAAAAAAAATAAATAAATAATCCAAAAATCAAAAATGGCTCAAGAACCAGCAATTGTGCCACCACTATCAGACAGCAACATGACCCAGGTTGCCTACCAGATTGGCAATGTGGAGAAATTCAACGGTGATCCAGGCTCACTATACACCTTTGTGAGTCGAATTGATTACATACTGGCTCTTTATGCTACCGGAGATGAACGCCAACAGCAGATCATATTTGGGCATATTGAACGCAGCATCAGCGGAGAAGTTATGCGCTGCATTGGAGCCTATGACATGTACACCTGGCAGCAGCTTAGAAGACAATTGGTACTCAACTATAAACCCCAGACCCCTAACCACGTTCTTTTAGAAGAGTTTCGAAAGACCCCATTTCGAGGCAATGTACGAGCATTCCTGGAAGAAGCAGAAAGCCGCAGACAAACACTTACTAGTAAGCTTGAATTAGAGCAAGATCTTGAAGAAAAGACTTTTTATTTGAAATTAATAAAATCCAGTATAGAATCACTAATTGAAAAATTACCTACACACATTTATTTAAGAATAAATAACCACAACATACCAGATTTGCGATCACTTATAAACCTTTTACAAGAGAAGGGCATGTACGAACAAATAAATCATACAAGTACACATGTCCAAAAACAAAATTTCTCTGATAAGCCACAAAAGTCCTTTAATCAAAATACTAATCAGTCTAACAATATCAGAAAATATCCAACACCTTTCCTACATTATAATTCACCAATACCATATCAAGCTCCACAAATTTATCAAACACCACCAACTAATAACCCACTTTATCGTCATCCAATACCCTACCACCCTAATCCAAACAATGTTTTTCAACCAAGCCAACAAAACAATGTTTTCCAACCAAGCCAACAAAACAATGCTTTTCAACCAAATCAACGAACAAACTTTACATCTCGACCAATTTTTAACACCAATCGAAACAATGCATTCGATCAGAATAGGTTCGGACAACAACCCCAATATCAAAATCAACAATCAACACAAAATTCAAGTTCCTATGTACCCAATCGACCAATAAAACGATTAAGACCAGCTAATAGTGGACAGACTGGGATGAGTGTTGACGAAACATTATATCAAGAGGACGCTTTTTATCAGCAGTGTGTTCCATATGACTATTTTTATTATCCAACTTACGACCATTCAGACTATTATCCAGAAAATCAATATCAAATTGACGAAAACAACCAAAATTTACAAAGAACACAACAGTTACAGCAGATTAATACAGACGAGACAAACAATGACAACCAAGAACCCAATGTTGAACAGGCCGAAAATTTTCAGCCACAAGCCTTGGAAAACCCCAATATATAACAATTAAATACAAAGAAAATAATTTGAAATGCCTTATTGATACCGGATCAACAGTTAACATGACATCTAAAAATATATTTGATTTACCAATCCAGAATACTAGTACTTTTATTCATACCAGCAATGGACCGCTCATTGTCAACAAAAGTATAATCATACCTTCAAAGATTTTGTTCCCAACAACAAATGAATTTTTATTGCACCCTTTCTCTGAGAATTACGATCTTTTATTAGGAAGAAAACTTTTAGCAGAAGCAAAAGCAACAATAAGTTACCGCGATCAAGAGGTAACTCTTTACAACAACAAATACAAATTAATAGAAGGAATAGCAACACATGAACAGAGTCATTTTCAAAATGTAAATATGATACCTGACACCATGCTCAGACAGCCAAATAAAATTTCACCCATTTTAGAATCAGACCTATACAGATTGGAACATTTAAATAACGAAGAAAAACAAAGATTGTGCGCACTCCTGCAGAAATACCATGACATACAGTACCATGAAGGTGATAAGTTGACATTTACTAATCAAACCAAACATACTATCAATACAAAGCACAATCTACCACTTTACTCTAAATACAGTTACCCACAGGCTTATGAACAGGAGGTCGAAAGCCAAATACAAGATATGCTAAATCAAGGTATTATACGTACCAGTAATTCACCTTACAATAGCCCCATCTGGGTGGTTCCAAAGAAACAAGATGCATCAGGCAAACAGAAATTTAGAATTGTAATAGACTACCGAAAATTAAATGAAATAACAGTAGGAGACAGACACCCAATCCCAAACATGGACGAAATCTTGGGAAAATTGGGCAGATGTAATTACTTCACAACTATAGACTTGGCAAAGGGTTTCCACCAGATCGAAATGGATCCAGAATCAGTTTCAAAGACAGCCTTTTCTACCAAGCACGGTCATTATGAATATTTGCGCATGCCATTCGGATTAAAAAACGCGCCAGCCACCTTTCAACGGTGCATGAATGATATTTTAAGACCACTCTTAAACAAACACTGTCTTGTGTATTTGGACGACATAATTGTATTCTCGACATCCCTTGATGAACACCTGCAATCGCTCGGACTAGTTTTCGAAAAATTAGCAAAAGCCAACCTTAAATTACAACTTGACAAATGTGAGTTTCTCAAGCAAGAAACCACATTTTTAGGACATGTTCTAACACCAGATGGAATAAAACCAAACCCTGAAAAAATTGAAGCCATTCAAAAATATCCAATTCCCACTAAACCAAAAGAAATAAAAGCTTTTCTTGGACTGACAGGATATTATCGTAAATTTATTCCAAACTTTGCAGACATAGCCAAACCCATGACTAAGTGTTTAAAAAAGAACATGAAAATTGACACTACCAACCCAGAATATGACTCTGCATTTAAAAAATTAAAATATCTAATATCAGAAGACCCAATTCTTAAAGTACCCGACTTTACAAAGAAATTCACTTTAACCACAGACGCAAGTGATGTCGCTTTGGGGGCAGTACTGTCACAAGATGGACACCCACTTAGCTACATTAGCCGAACACTTAATGAACACGAAATAAATTACAGCACAATTGAAAAAGAACTCTTAGCAATTGTATGGGCGACAAAGACTTTTCGACACTACCTACTTGGAAGACACTTTGAAATATCCAGTGACCATCAACCATTGAGCTGGTTGTACCGTATGAAAGACCCAAATTCAAAACTGACCCGATGGAGAGTAAAATTATCCGAATTCGATTTTGATATAAAATATATAAAAGGAAAAGAAAATTGCGTGGCGGATGCTCTGTCCAGAATAAAACTTGAGGAGACATATTTGAGCGAACAAACCCAACATAGTGCAGAAGAGGACAATAGTGATTTAATTTTTATTACAGAAAGACCTCTAAATACATTTAACAGACAAGTTATATTTTCAAAAGGACCACCAGACATTAAAGTTACGAAATATTTCAAAAAACACATCACCCAAATATTTTACGACATTATGACCAGGGAAAAAGCCGAACAATATTTGATAGACCATTTTTGTGGTAAGAAAAGTGCGTTGTATATTGAGAGTGACGCTGATTTCGAAGTCATTCAAGCCGCACATAAATTAGCCATAAACACCAAATATACAAAAATCCTGCGTAGCACGATTTTGTTAAAAAACATAACCACTTATGCGGAATTTAAGGAATTGATCTTGACTGCTCATGAAAAACTTCTACACCCAGGCATACAGAAAACTACTAAACTTTTCGGAGAAACTTACTATTTCCCTAATAGCCAGCTACTTATTCAGAATATAATAAATGAGTGCAGTATTTGCAATCTGGCAAAAACAGAGCACCGAAATACAGACATGCCAACGAAAACCACACCCAAACCAGAACATTGCCGCGAAAAATTCATGATAGACATTTACTCATCCGAAGGCAAACATTACGTTAGTTGCATAGACATTTATTCGAAATTTGCCACATTAGAAGAAATAAAAACAAAAGACTGGATAGAATGCAAAAACGCGCTTATGCGCATATTCAACCAGCTTGGCAAGCCAAAGTTACTAAAGGCGGACAGAGACGGCGCATTTTCCAGTTTAGCCCTCAAGAGATGGCTGGAGAGTGAGGAAGTCGAATTGCAGCTTAACACAACAAAAACTGGTGTGGCGGACATAGAAAGACTACATAAAACAATTAATGAAAAGATTCGCATAATCAAAACATCCGATGACGAAGAAACCAAATTGAGCAAAATGGAAACAGTACTTAACATATACAATCATAAAACCAAACACGACACCACTGGACAGACCCCTGCACACATATTTCTCTACGCTGGACAACCAATATTAGATACCCAACAAAACAAAGAAAACAAAATAAACAAAATAAATAATGACAGAGTGGAGTACGAAGTCGACACAAGATACAGAAAAGGTCCACTACAGAAAGGCAAATTAGAAAATCCTTTTAAGCCAACAAAAAATGTGGAGCAGACTGACTCTGATCATTATAAAATTACTAATAGAAATAGAATTACTCACTACTACAAAACACAATTCAAAAAACGAAAGAAAAATAATCAGCTCTCAATTTCACAGGCACCTGGCACTTGATAACATTGCTGCTGATGCTGATCACAACAGTTCATGGACAACAAATTGAAATTAATAATATTGACACAAACCACGGATATCTCCTTTTTTCTGATAAACCAGTCCAGATACCATCATCCTTTGAACATCATTGCTTGAGAATCAATTTAACTGAAATAGACACCATAGCTGATTATTTTGAGCAAAGACTACGTACCGACTACCATGCACCCCAGGTCAAATTTTTATACAACAAAATGAGAAGAGAACTAGCTGGAATAGCCTTGCGACATAGAAATAAACGGGGACTTATTAACATTGTAGGTTCAGTTTTTAAATACCTATTTGGCACACTTGACGAAAATGATCGAGTGGATATACAGAGGAAACTTGAAACAAACGCCCATAACTCGGTAAATTTACATGAACTCAATGACGCTATTCAATTAATAAATGACGGAATGCAAAAGATACAGAATTATGAAAACAACAGCAACATCATTAACAGTCTTTTATATGAACTCATGCAGTTTACAGAATACATAGAAGATGTGGAAATGGGAATGCAGCTTTCCAGACTCGGTCTATTTAATCCCAAACTACTAAACTACGATAAACTTGAGAATGTAAACAGCCAAAATATTTTAAACATTAAAACATCCACTTGGATTAATTACAATGATAACCAATTATTAATCATATCTCACATACCTATTAACTTTTCATTAATAAATACAGTAAAAATAATCCCTTACCCAGACTCGAACGGCTATCAGCTAGAATACACAGACACACAATCATATTTTGAAAGAGAAAATAAAGTTTACAATAACGAAAATAAAGAAATAAACAATGAGTGTGTCACCAACATTATTAAACATTTAAAACCAATTTGTAATTTTGAGTCAATCCACACAGATGAAATAATAAAATACATAGAACCAAACACAATTGTAACCTGGAATTTAACCCAAACAAGTCTCAAACAAAATTGTCAAAATTCATTTAATAATATAAAAATAAAAGGAAACAAAATGATAAAAGTAACCCAATGTAAAATAGAAATCAATAGCATAATTCTAAGTGAAAATCTCTTTAAACCAGAAATAGATTTGACACCATTATACACACCACTTAACATAACAAAAATAAAAACTGTTAAACACAACGACATTAATGAAATGATTTCACAAAACAATATTACACTTTACATATTTATGACTACTGTCATCATTATACTTATTTTATTGTACTTATATTTAAGATACGTATCATTTAACCCATTCATGATGCTGTATGCAAAACTAAAATTAAGAAAAAATCAAAATCAAAACACAGCACAACAAATAGAAATGGAAGACGTTCCATTACCCCTACTATATCCATCAATCCCAGCCCAAGTATAGGCTTCTCTTTAAGGGAAGGGAAGTGACATATTCACATACAAAACCACATAACGTAGAGTAAACATATTGAAAAGCCGCATACGTCAACAATAAGTGACCACCATGCTAATGTGGATCAAATAACAAAAATATCCACTCTGCATTTTGACACCCCCATACTGTATGCCATCTGCGCAGTATGCATTCTAATAAACAAATTCTTTGACAGCGGCACTTAGCCATTCTTGTAAACAAATCTTAAAGTCTGCCTGCTCTCTCTGAGGCTTCTCCTCCACTTAAGAATCCAAGAGCAATGCTCTCCCAAAAACACTAACATATTCTTTAAGCAAGCACAGAGGCTTCTCCTCATTTTCACTTTCATTTGATTTTCAGTCTTAAGCTGAACGTTAATCAATAAACAACACAATCGATACCGAAATTTTGATTCGTTTTATTTTGGCAAAACTCAATTTTCAGCGTTGGTCTTAGTTCATATTCGGAACGGTCCATTTAATAGACTCAAAACTATTTATTGCAACCATTTATTTGCAATT