comparison test-data/genbank_input.txt @ 0:bcdd1a35e545 draft default tip

planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author portiahollyoak
date Fri, 22 Apr 2016 12:09:14 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bcdd1a35e545
1 ID DME9736 standard; DNA; INV; 7411 BP.
2 XX
3 AC AJ009736;
4 XX
5 DR FLYBASE; FBgn0026065; Idefix.
6 XX
7 FT source AJ009736:1..7411
8 FT SO_feature five_prime_LTR ; SO:0000425:1..600
9 FT SO_feature three_prime_LTR ; SO:0000426:6841..7411
10 FT SO_feature CDS ; SO:0000316:<988..2031
11 FT /name="Idefix\gag"
12 FT /db_xref="FLYBASE:FBgn0027381"
13 FT /db_xref="SPTREMBL:O96739"
14 FT /db_xref="NCBI_PROTEIN:CAA08806.1"
15 FT /translation="ARKLKDIMAVPQLSETHLNQLLNQIKELNYYDGAPGKLSGFVNQV
16 FT EQLLSLYPTQEARQAHVIYGAVKRLLVDSALEVVTQERANTWLDMKKALAMAFKDHRPY
17 FT VTLIRQLEDISYPGSICKFIEKLETQYWIMFDKLELESDHVDKSNYTEMLNKTVKSVID
18 FT RKLPDRIYMSLARKDIDTIYKLKQASMELGLYDAIPENHRSNRTEMNKRRNRGNYNQNN
19 FT NQKYYNNRNHNYSNYYPSMNQNHNTQPPQNPTQPMTNQNQYSPRFIPNNQRGNYYAFRR
20 FT DLTQAQQNNPLNNTLNFQPSTSNNINRQGPVKRQRESQSDQSRMDVNFHQAASDTQMIE
21 FT KDIQVPM"
22 FT SO_feature CDS ; SO:0000316:<1950..5402
23 FT /name="Idefix\pol"
24 FT /db_xref="FLYBASE:FBgn0027380"
25 FT /db_xref="SPTREMBL:O96740"
26 FT /db_xref="NCBI_PROTEIN:CAA08807.1"
27 FT /translation="PKQDGCKFSSSCLGHSNDREGHTSPYVKIIHHNKNYKGMIDTGSS
28 FT INIIRENFENLEEKEENLIVYTIKGPITLKRSIIIKPTSVCPSAQKFYIHKFSDNYDFL
29 FT LGRKYLEDTKAKIDYANETVTLGSKVFKFLYEEKKGETASKCLDPQEKNDSALVDRTKP
30 FT KMQKVKTAPKCLKPKHQQQKKETALPKCLISNVVKDTVDNDVTHLDPMSVDNDIVNFAI
31 FT NNELRECNEYRLEHLNAEEVECLKKFLYEYRDIQYKEGENLTFTSTIKHVIQTQHEDPV
32 FT YRKPYKYPQSVDQEVNKQIKEMIEQGIVRKSKSPYCSPIWVVPKKADASGKQKFRLVVD
33 FT YRNLNEITVNDKFPIPRMDEILDKLGRCQYFTTIDLAKGFHQIQMDENSIAKTAFSTKH
34 FT GHYEYTRMPFGLKNAPATFQRCMNNLLEDLIYKDCLVYLDDIIVYSTPLEEHILSLKKV
35 FT FEKLRDANLKLQLDKCEFMKKETEFLGHIVTTNGIKPNPNKTKAITNFPLPKTPKQIKS
36 FT FLGLCGFYRKFIPNFAKIVKPMTLKLKKGAIIDTKCKEYIESFEKLKVLITSDPILIYP
37 FT DFSKPFSLTTDASNVAIGAVLSQNHKPVCYASRTLNEHEINYATIEKELLAIVWATKYF
38 FT RSYLFGRPFEVLSDHKPLVWLNNIKEPNMKLQRWKIKLNEFDYKIKYLPGKENHVADAL
39 FT SRTKIEVMVGEVANSADATIHSAIEDNLNYIPITERPINYFSRQIEIEKGDNDTTSVQH
40 FT LFQKLKIKIVYKEMTPELAKNLIKEYVCTKKSAIYFPNDEDFLIFQRAFTEIISPNNFT
41 FT KLLRCTTKLIDILTYAEFKDLILKKHKELLHPGIEKTINLFKEEYYYPDSQKLIQTIIN
42 FT ECQICYLAKTEHQTQMTYETTPEIFNTREKYMIDFYLTGNQIFLSCIDIYSKFASLVEL
43 FT KSRDWLEAKRAITKIFNDMGKPQEIKADKDSAFMCLALQNWLRSEGVQISISTSKNGIS
44 FT DIERFHKTVNEKLRIIGSQQNVEDRCTKFERILYIYNHKTKHNSTKRFPADIFLYAGSP
45 FT DFNVQQNKIDRIEYLNKNRHDFEVDIKYRQAPLVKSKITNPFKKTGRIGQVDDKHFEET
46 FT NRGRKIVHYKSKFKKQKKFNKSKYDNSRPTKEAQSTQHTSNNA"
47 FT SO_feature CDS ; SO:0000316:5248..6780
48 FT /name="Idefix\env"
49 FT /db_xref="FLYBASE:FBgn0027382"
50 FT /db_xref="SPTREMBL:O96741"
51 FT /db_xref="NCBI_PROTEIN:CAA08808.1"
52 FT /translation="MINISKKQIVAGRSFTISQNLRNRKSLIRANMIIPDQPKKHKVHN
53 FT ILLIMLSCILSLIITVKCNNIEVNPVNAKNGYLIFQTGTMEIPTSYEYHYLSINITKTM
54 FT LMFEDIVSEANNYPNVPQIQYLVDKLKREINGLRIISRSKRGLLNVVGKAYKYLFGTLD
55 FT EDDREELEEKINNMSEDSVKTHDLNTILDVINSGIDIINKLKVDKEQHQQIAVLIFNLE
56 FT QFTEYIEDIELGLQLTRLGIFNPRLLKHDYLKHVNSEKMLKIKTSTWLKTDTNEILIIS
57 FT HIPSEVTKVPIFQIVPYPDEHNYILTEQIFDKFYIFDNQVFHKDTNRDIFDKCIIGIIK
58 FT QEQTQCKYIKTHKNYQINYIEPNILLTWNIPETAVNQDCTHNKILISGNNIIKIKNCTI
59 FT QIDEFLISNNLADFTQTIYITNNVTRLEPINHLQTREMIETHVKHYNFFQIICITTFVI
60 FT MIISLTLYVAYKFKNIPKKIIVNIVSKKNTRTLKIMSMKIFNKEIILPYTQI"
61 XX
62 CC Derived from AJ009736 (e1371475) (Rel. 58, Last updated, Version 1).
63 CC Takis Benos and Michael Ashburner, 1-Feb-1999.
64 CC Any changes to original sequence record are annotated in an FT line.
65 XX
66 SQ Sequence 7411 BP; 3047 A; 1363 C; 1109 G; 1892 T; 0 other;
67 GTGACATATC CATAAGTCCC TAAGACTTAA GCATATGCCT ACATACTAAT ACACTTACAA 60
68 CACATACACC CCAATACAAC ATACACTACT CCGGATGTAC CCAACAGATA CCAGATAAGA 120
69 ATAAGATTGT TATATGATCC TCGAGAATGG AAAAAACCCC AATTCTAGAT AAGTCACCCA 180
70 CTGGTAGACT AAACATCCGT CCCCTAATTT AAACAATTCC TTGCTTAAGC CTCACCCCAT 240
71 CGTCACATTC CCACGTTCAA AGCTCGGAGC CGCAATCCCG AAAAACAAAA GTATCGATTT 300
72 CAATAAACAA ATTATAAGAA TCTAAGAGCA CTTGTATCCA AGAGCAAATG CACTTGAATC 360
73 CAAGAGAAAC GCAAAGCTTT TTCTCTTTAC GATCAGAATC CTAAAGTCTA AAGTCCATAT 420
74 TAGAAAAGCT CGATACCGAG GCTTGAACGT CAACCAAATC AGAATAATTA TCAGAGTTCA 480
75 GTTTGAGACC TAATTGTAAA AGGTTCGGTG TTCTTCTCAA ATAAAAAGAT TGTAATCATT 540
76 TAGTGAAATA AAAATTATAT TTTTTTCACT TATAAATATT GCAAGTATTT AATTGGCGCA 600
77 GTCGGTTAGG ATCCAATAAA ATAAAAGAGT CCTTTTAGTA CGGTACTGAT CAACTGAAGG 660
78 ATATGCTATA CGACTAGCTA TCCAAGATCA GCGAATTAAA ATAGTGATTC AAAAATATTT 720
79 TTTAATCCGC AAAAGAATCT ACGTGAAAGT AGTATTCAAA ATAAAATCCC GTGCGGTCGG 780
80 AAACAAAAAT TAATTTAAAT TTTTTAATTC CGAAACTTAA AACCAAGTTT AAAGAAAACT 840
81 TAAAATCAAG AAAACTTAAA ACCAAGTTTA AAGAAAACTT AAAATCAAGA AAACTTAAAA 900
82 CCAAGTTTAA AGAAAACTTA AAATCAAGAA AACTTAAAAC CAAGTTTAAA GAAAACTCAA 960
83 AATCAAGAAA ACTTAAAGCC AAAATAAGCT AGAAAACTAA AAGACATCAT GGCAGTCCCA 1020
84 CAACTCTCAG AAACACACCT AAACCAACTG CTAAACCAAA TCAAAGAATT AAACTACTAC 1080
85 GATGGCGCAC CTGGCAAATT ATCTGGATTC GTCAACCAAG TGGAACAACT GCTCAGTTTA 1140
86 TACCCAACAC AGGAAGCAAG ACAGGCACAC GTCATATATG GAGCAGTGAA GCGGTTATTA 1200
87 GTGGATTCAG CCTTAGAAGT CGTAACCCAG GAAAGAGCTA ACACATGGCT GGACATGAAG 1260
88 AAAGCACTGG CAATGGCATT CAAAGACCAT AGACCTTATG TAACTCTCAT CAGACAATTA 1320
89 GAAGACATAT CATACCCAGG AAGTATCTGT AAGTTTATAG AAAAATTAGA AACACAATAC 1380
90 TGGATTATGT TCGATAAGTT AGAATTAGAA AGTGACCATG TTGATAAATC GAATTATACC 1440
91 GAAATGTTAA ACAAAACTGT TAAATCAGTA ATAGATCGAA AACTGCCGGA TAGAATTTAT 1500
92 ATGTCTTTGG CACGTAAAGA TATTGATACA ATTTATAAAT TAAAACAAGC ATCAATGGAA 1560
93 TTAGGCCTTT ATGATGCTAT TCCAGAAAAT CACCGTTCTA ATAGAACAGA AATGAATAAA 1620
94 CGTAGGAACA GGGGAAACTA TAATCAAAAT AATAATCAAA AATATTACAA TAATAGAAAT 1680
95 CACAACTACA GTAATTATTA TCCTAGCATG AATCAGAATC ATAATACACA ACCACCTCAG 1740
96 AATCCGACTC AACCTATGAC AAATCAAAAC CAATATTCAC CGCGTTTCAT ACCGAATAAT 1800
97 CAAAGAGGGA ATTATTATGC ATTTAGACGA GACTTAACAC AAGCTCAGCA GAACAACCCA 1860
98 CTTAATAACA CCCTTAACTT CCAACCTTCG ACATCGAATA ATATTAACAG ACAAGGGCCA 1920
99 GTAAAAAGAC AACGCGAGAG TCAGAGTGAC CAAAGCAGGA TGGATGTAAA TTTTCATCAA 1980
100 GCTGCCTCGG ACACTCAAAT GATAGAGAAG GACATACAAG TCCCTATGTA AAAATAATTC 2040
101 ATCATAATAA AAATTATAAG GGAATGATCG ATACAGGATC ATCAATTAAC ATCATAAGAG 2100
102 AAAATTTTGA GAACTTAGAA GAAAAGGAAG AAAACCTAAT AGTATACACT ATTAAAGGAC 2160
103 CAATAACACT AAAGAGAAGT ATAATAATAA AACCTACTTC AGTATGTCCG TCTGCTCAAA 2220
104 AATTCTACAT TCACAAATTT TCTGATAACT ATGATTTCTT GTTAGGTCGA AAGTATTTAG 2280
105 AAGATACAAA AGCTAAAATA GATTATGCTA ACGAAACAGT AACACTAGGC TCAAAAGTAT 2340
106 TTAAGTTTCT CTATGAAGAA AAGAAGGGCG AGACCGCATC CAAATGCCTT GACCCACAAG 2400
107 AAAAGAATGA TTCCGCTCTA GTGGACAGAA CCAAACCAAA AATGCAAAAG GTTAAGACCG 2460
108 CACCTAAGTG CCTTAAACCA AAGCATCAAC AGCAGAAGAA AGAGACCGCA TTACCCAAAT 2520
109 GCCTCATTTC AAATGTTGTT AAAGACACAG TGGACAATGA TGTAACACAT CTCGATCCCA 2580
110 TGTCCGTTGA CAACGATATA GTCAACTTCG CGATTAACAA TGAGTTACGC GAATGTAACG 2640
111 AGTATAGACT CGAACACTTA AATGCAGAGG AAGTTGAATG TTTAAAGAAG TTCCTATACG 2700
112 AATATAGAGA CATTCAGTAC AAAGAGGGCG AAAATTTGAC CTTCACCAGT ACTATTAAAC 2760
113 ATGTCATCCA GACTCAACAC GAAGACCCAG TATACCGTAA ACCCTACAAG TACCCTCAAA 2820
114 GCGTTGACCA AGAAGTTAAC AAACAAATTA AAGAAATGAT AGAACAAGGG ATTGTTCGCA 2880
115 AATCGAAGTC CCCTTATTGT TCTCCTATTT GGGTGGTCCC CAAGAAGGCA GACGCCTCTG 2940
116 GGAAACAAAA ATTCAGGTTG GTAGTCGATT ACAGGAACCT AAATGAGATA ACTGTTAACG 3000
117 ACAAATTTCC CATTCCCCGA ATGGATGAGA TATTGGACAA ACTAGGTAGA TGCCAATACT 3060
118 TTACCACTAT AGATCTAGCC AAGGGTTTTC ACCAAATCCA AATGGATGAA AATTCTATTG 3120
119 CAAAAACAGC TTTTTCAACT AAGCATGGGC ATTATGAATA TACTCGTATG CCCTTTGGTT 3180
120 TAAAAAACGC TCCAGCTACT TTTCAGAGAT GCATGAATAA TCTTCTGGAA GATTTAATCT 3240
121 ACAAAGACTG TTTAGTCTAT TTAGACGATA TTATTGTTTA TTCCACTCCA TTGGAAGAAC 3300
122 ACATTTTATC CCTAAAGAAA GTCTTTGAAA AACTGAGAGA CGCTAATTTA AAGTTGCAAC 3360
123 TAGATAAATG TGAATTCATG AAGAAAGAAA CTGAATTCCT AGGACACATC GTCACAACAA 3420
124 ATGGCATCAA ACCAAATCCA AATAAAACTA AAGCAATTAC AAATTTTCCA TTACCCAAGA 3480
125 CACCTAAGCA AATAAAATCA TTTTTGGGAT TATGTGGATT CTATCGCAAG TTTATTCCTA 3540
126 ACTTTGCCAA AATAGTTAAA CCCATGACCC TCAAATTAAA GAAAGGTGCT ATAATAGACA 3600
127 CCAAATGTAA AGAATACATC GAATCATTTG AAAAATTAAA AGTTTTGATA ACTTCAGACC 3660
128 CGATATTAAT CTATCCTGAT TTTTCAAAAC CTTTTTCTTT GACAACTGAT GCTAGCAACG 3720
129 TAGCTATTGG TGCAGTGTTA TCACAAAATC ACAAGCCAGT TTGTTATGCC AGTAGAACGC 3780
130 TAAACGAACA TGAAATCAAC TATGCTACGA TTGAAAAAGA ATTGTTAGCT ATAGTTTGGG 3840
131 CTACAAAATA TTTCAGGTCA TACTTATTCG GCAGACCATT TGAAGTATTA AGTGATCACA 3900
132 AGCCACTGGT ATGGCTCAAC AACATTAAAG AACCAAACAT GAAATTGCAA AGATGGAAAA 3960
133 TAAAACTTAA TGAATTCGAT TATAAAATCA AATATCTTCC AGGCAAAGAA AACCATGTCG 4020
134 CGGATGCTCT TTCCCGCACG AAAATAGAAG TTATGGTTGG CGAGGTCGCA AATAGCGCAG 4080
135 ACGCAACTAT ACACAGTGCC ATTGAAGATA ATCTAAATTA CATACCCATA ACAGAAAGAC 4140
136 CAATAAATTA CTTCTCTAGA CAAATAGAGA TAGAAAAAGG CGATAACGAT ACAACAAGTG 4200
137 TACAACATTT GTTTCAAAAA TTAAAGATTA AGATAGTCTA TAAAGAAATG ACACCTGAAC 4260
138 TCGCCAAAAA CCTCATTAAG GAATATGTGT GCACCAAAAA GAGTGCAATT TATTTCCCTA 4320
139 ATGACGAAGA TTTTCTGATC TTCCAGAGAG CGTTTACCGA AATTATAAGC CCTAACAATT 4380
140 TCACAAAACT CTTGAGATGT ACCACAAAGT TAATTGATAT ACTAACGTAT GCAGAATTCA 4440
141 AAGATTTAAT CTTAAAGAAA CATAAGGAAC TTTTACATCC GGGTATAGAA AAAACAATCA 4500
142 ATTTATTTAA AGAAGAATAT TACTATCCTG ATAGTCAAAA GCTTATTCAA ACCATTATCA 4560
143 ATGAATGTCA AATTTGTTAT CTAGCAAAAA CGGAACATCA AACACAAATG ACATATGAGA 4620
144 CTACACCAGA AATATTTAAC ACAAGAGAAA AATACATGAT AGATTTTTAT CTCACAGGAA 4680
145 ACCAGATCTT CTTATCTTGC ATTGATATCT ATTCGAAATT TGCATCACTA GTTGAATTAA 4740
146 AAAGTAGAGA TTGGCTAGAA GCAAAAAGAG CCATTACTAA AATATTCAAT GACATGGGAA 4800
147 AACCGCAAGA AATTAAAGCA GACAAAGACT CAGCTTTTAT GTGTTTAGCC TTACAAAATT 4860
148 GGTTAAGATC TGAAGGTGTA CAAATTTCTA TAAGCACTAG CAAAAATGGT ATATCTGATA 4920
149 TAGAAAGATT CCACAAGACC GTAAACGAAA AGCTAAGAAT CATTGGTAGC CAACAAAATG 4980
150 TTGAAGATAG GTGCACAAAA TTCGAAAGAA TTCTATACAT ATACAATCAC AAAACTAAAC 5040
151 ATAATAGTAC TAAAAGATTT CCAGCAGACA TTTTCCTATA TGCAGGCAGT CCAGATTTTA 5100
152 ATGTACAACA AAACAAAATC GATAGGATAG AATACCTCAA TAAGAATAGA CACGATTTTG 5160
153 AAGTTGATAT AAAATATAGA CAAGCCCCAC TTGTAAAAAG TAAAATAACC AATCCATTTA 5220
154 AAAAGACAGG AAGAATTGGA CAAGTAGATG ATAAACATTT CGAAGAACAA AATCGTGGCA 5280
155 GGAAGATCGT TCACTATAAG TCAAAATTTA AGAAACAGAA AAAGTTTAAT AAGAGCAAAT 5340
156 ATGATAATTC CAGACCAACC AAAGAAGCAC AAAGTACACA ACATACTTCT AATAATGCTT 5400
157 AGTTGCATAC TATCACTTAT CATCACGGTC AAGTGCAACA ATATAGAAGT AAATCCAGTA 5460
158 AACGCGAAAA ATGGATACCT TATATTCCAA ACAGGAACAA TGGAAATTCC AACCAGCTAT 5520
159 GAATACCATT ATTTAAGCAT AAACATAACA AAGACAATGC TCATGTTCGA AGATATAGTA 5580
160 AGTGAAGCAA ACAACTATCC TAATGTACCA CAAATACAAT ATTTAGTCGA CAAATTAAAA 5640
161 CGAGAAATAA ATGGGTTAAG AATTATTAGT CGAAGTAAAA GAGGTCTTTT AAACGTAGTA 5700
162 GGAAAAGCAT ACAAATACTT ATTCGGCACA TTAGATGAGG ATGACAGAGA AGAGTTAGAA 5760
163 GAAAAAATAA ACAACATGTC AGAAGACTCT GTAAAAACCC ATGACCTAAA CACGATTCTA 5820
164 GATGTAATCA ATAGTGGTAT AGATATAATT AATAAGCTCA AAGTAGATAA AGAACAACAC 5880
165 CAACAAATTG CGGTACTAAT ATTTAACCTA GAGCAATTTA CAGAATATAT AGAAGACATA 5940
166 GAATTGGGTC TGCAATTAAC CAGACTAGGA ATTTTCAATC CAAGATTACT AAAGCATGAC 6000
167 TATTTAAAAC ATGTAAATTC AGAAAAAATG CTAAAGATAA AAACGTCAAC CTGGCTTAAA 6060
168 ACAGACACGA ACGAAATTTT GATTATTTCC CATATTCCTA GCGAAGTTAC TAAAGTTCCA 6120
169 ATATTCCAAA TTGTTCCGTA CCCAGATGAA CATAATTATA TTCTAACCGA GCAAATATTC 6180
170 GATAAATTCT ACATATTTGA TAACCAAGTA TTCCATAAAG ATACCAATAG GGATATATTC 6240
171 GACAAATGTA TTATTGGAAT CATCAAACAA GAGCAAACTC AATGCAAATA TATTAAAACA 6300
172 CATAAAAATT ACCAAATAAA TTATATAGAA CCAAATATAC TATTAACATG GAATATTCCT 6360
173 GAAACAGCTG TTAACCAAGA CTGTACACAC AATAAAATAT TAATTTCAGG AAACAACATC 6420
174 ATTAAAATTA AAAATTGTAC CATACAAATA GATGAATTCT TAATCTCTAA TAATCTAGCA 6480
175 GACTTTACAC AAACAATTTA TATCACCAAC AATGTAACAC GTCTAGAACC AATAAATCAC 6540
176 TTACAAACGA GAGAAATGAT AGAAACCCAT GTAAAACACT ATAACTTTTT TCAAATTATA 6600
177 TGCATTACAA CGTTCGTCAT AATGATAATT AGTTTGACTC TGTATGTAGC ATATAAGTTT 6660
178 AAAAATATAC CTAAGAAAAT TATTGTCAAT ATCGTAAGCA AAAAGAACAC ACGCACCTTG 6720
179 AAAATAATGT CAATGAAAAT ATTCAACAAG GAAATAATAT TACCTTATAC CCAAATTTAA 6780
180 CGACCTGAGG ACAGGCCAAA TTCAAAGGTT GGGGGAGTGA CATATCCATA AGTCCCTAAG 6840
181 ACTTAAGCAT ATGCCTACAT ACTAATACAC TTACAACACA TACACCCCAA TACAACATAC 6900
182 ACTACTCCGG ATGTACCCAA CAGATACCAG ATAAGAATAA GATTGTTATA TGATCCTCGA 6960
183 GAATGGAAAA AACCCCAATT CTAGATAAGT CACCCACTGG TAGACTAAAC ATCCGTTCCC 7020
184 CTAATTTAAA CAATTCCTTG CTTAAGCCTC ACCCCATCGT CACATTCCCA CGTTCAAAGC 7080
185 TCGGAGCCGC AATCCCGAAA AACAAAAGTA TCGATTTCAA TAAACAAATT ATAAGAATCT 7140
186 AAGAGCACTT GTATCCAAGA GCAAATGCAC TTGAATCCAA GAGAAACGCA AAGCTTTTTC 7200
187 TCTTTACGAT CAGAATCCTA AAGTCTAAAG TCCATATTAG AAAAGCTCGA TACCGAGGCT 7260
188 TGAACGTCAA CCAAATCAGA ATAATTATCA GAGTTCAGTT TGAGACCTAA TTGTAAAAGG 7320
189 TTCGGTGTTC TTCTCAAATA AAAAGATTGT AATCATTTAG TGAAATAAAA ATTATATTTT 7380
190 TTTCACTTAT AAATATTGCA AGTATTTAAT T 7411
191 //
192 ID DMIS176 standard; DNA; INV; 7439 BP.
193 XX
194 AC X01472; J01060; J01061;
195 XX
196 DR FLYBASE; FBgn0000004; 17.6.
197 XX
198 FT source X01472:1..7439
199 FT SO_feature five_prime_LTR ; SO:0000425:1..512
200 FT SO_feature three_prime_LTR ; SO:0000426:6928..7439
201 FT SO_feature TATA_box ; SO:0000174:372..377
202 FT SO_feature TATA_box ; SO:0000174:7271..7277
203 FT SO_feature primer_binding_site ; SO:0005850:511..529
204 FT SO_feature polyA_signal_sequence ; SO:0000551:372..377
205 FT SO_feature polyA_signal_sequence ; SO:0000551:7299.7304
206 FT SO_feature RR_tract ; SO:0000435:6917..6927
207 FT SO_feature CDS ; SO:0000316:1074..2393
208 FT /name="17.6\gag"
209 FT /db_xref="FLYBASE:FBgn0044339"
210 FT /db_xref="SWISS-PROT:P04282"
211 FT /db_xref="NCBI_PROTEIN:CAA25701.1"
212 FT /translation="MAQEPAIVPPLSDSNMTQVAYQIGNVEKFNGDPGSLYTFVSRIDY
213 FT ILALYATGDERQQQIIFGHIERSISGEVMRCIGAYDMYTWQQLRRQLVLNYKPQTPNHV
214 FT LLEEFRKTPFRGNVRAFLEEAESRRQTLTSKLELEQDLEEKTFYLKLIKSSIESLIEKL
215 FT PTHIYLRINNHNIPDLRSLINLLQEKGMYEQINHTSTHVQKQNFSDKPQKSFNQNTNQS
216 FT NNIRKYPTPFLHYNSPIPYQAPQIYQTPPTNNPLYRHPIPYHPNPNNVFQPSQQNNVFQ
217 FT PSQQNNAFQPNQRTNFTSRPIFNTNRNNAFDQNRFGQQPQYQNQQSTQNSSSYVPNRPI
218 FT KRLRPANSGQTGMSVDETLYQEDAFYQQCVPYDYFYYPTYDHSDYYPENQYQIDENNQN
219 FT LQRTQQLQQINTDETNNDNQEPNVEQAENFQPQALENPNI"
220 FT SO_feature CDS ; SO:0000316:2345..5518
221 FT /name="17.6\pol"
222 FT /db_xref="FLYBASE:FBgn0014453"
223 FT /db_xref="SWISS-PROT:P04323"
224 FT /db_xref="NCBI_PROTEIN:CAA25702.1"
225 FT /translation="TGRKFSATSLGKPQYITIKYKENNLKCLIDTGSTVNMTSKNIFDL
226 FT PIQNTSTFIHTSNGPLIVNKSIIIPSKILFPTTNEFLLHPFSENYDLLLGRKLLAEAKA
227 FT TISYRDQEVTLYNNKYKLIEGIATHEQSHFQNVNMIPDTMLRQPNKISPILESDLYRLE
228 FT HLNNEEKQRLCALLQKYHDIQYHEGDKLTFTNQTKHTINTKHNLPLYSKYSYPQAYEQE
229 FT VESQIQDMLNQGIIRTSNSPYNSPIWVVPKKQDASGKQKFRIVIDYRKLNEITVGDRHP
230 FT IPNMDEILGKLGRCNYFTTIDLAKGFHQIEMDPESVSKTAFSTKHGHYEYLRMPFGLKN
231 FT APATFQRCMNDILRPLLNKHCLVYLDDIIVFSTSLDEHLQSLGLVFEKLAKANLKLQLD
232 FT KCEFLKQETTFLGHVLTPDGIKPNPEKIEAIQKYPIPTKPKEIKAFLGLTGYYRKFIPN
233 FT FADIAKPMTKCLKKNMKIDTTNPEYDSAFKKLKYLISEDPILKVPDFTKKFTLTTDASD
234 FT VALGAVLSQDGHPLSYISRTLNEHEINYSTIEKELLAIVWATKTFRHYLLGRHFEISSD
235 FT HQPLSWLYRMKDPNSKLTRWRVKLSEFDFDIKYIKGKENCVADALSRIKLEETYLSEQT
236 FT QHSAEEDNSDLIFITERPLNTFNRQVIFSKGPPDIKVTKYFKKHITQIFYDIMTREKAE
237 FT QYLIDHFCGKKSALYIESDADFEVIQAAHKLAINTKYTKILRSTILLKNITTYAEFKEL
238 FT ILTAHEKLLHPGIQKTTKLFGETYYFPNSQLLIQNIINECSICNLAKTEHRNTDMPTKT
239 FT TPKPEHCREKFMIDIYSSEGKHYVSCIDIYSKFATLEEIKTKDWIECKNALMRIFNQLG
240 FT KPKLLKADRDGAFSSLALKRWLESEEVELQLNTTKTGVADIERLHKTINEKIRIIKTSD
241 FT DEETKLSKMETVLNIYNHKTKHDTTGQTPAHIFLYAGQPILDTQQNKENKINKINNDRV
242 FT EYEVDTRYRKGPLQKGKLENPFKPTKNVEQTDSDHYKITNRNRITHYYKTQFKKRKKNN
243 FT QLSISQAPGT"
244 FT SO_feature CDS ; SO:0000316:5488..6903
245 FT /name="17.6\env"
246 FT /db_xref="FLYBASE:FBgn0027624"
247 FT /db_xref="SWISS-PROT:P04283"
248 FT /db_xref="NCBI_PROTEIN:CAA25703.1"
249 FT /translation="SALNFTGTWHLITLLLMLITTVHGQQIEINNIDTNHGYLLFSDKP
250 FT VQIPSSFEHHCLRINLTEIDTIADYFEQRLRTDYHAPQVKFLYNKMRRELAGIALRHRN
251 FT KRGLINIVGSVFKYLFGTLDENDRVDIQRKLETNAHNSVNLHELNDAIQLINDGMQKIQ
252 FT NYENNSNIINSLLYELMQFTEYIEDVEMGMQLSRLGLFNPKLLNYDKLENVNSQNILNI
253 FT KTSTWINYNDNQLLIISHIPINFSLINTVKIIPYPDSNGYQLEYTDTQSYFERENKVYN
254 FT NENKEINNECVTNIIKHLKPICNFESIHTDEIIKYIEPNTIVTWNLTQTSLKQNCQNSF
255 FT NNIKIKGNKMIKVTQCKIEINSIILSENLFKPEIDLTPLYTPLNITKIKTVKHNDINEM
256 FT ISQNNITLYIFMTTVIIILILLYLYLRYVSFNPFMMLYAKLKLRKNQNQNTAQQIEMED
257 FT VPLPLLYPSIPAQV"
258 XX
259 CC Derived from X01472 (g8142) (Rel. 36, Last updated, Version 2).
260 CC Takis Benos and Michael Ashburner, 20-Aug-1997.
261 CC Any changes to original sequence record are annotated in an FT line.
262 XX
263 SQ Sequence 7439 BP; 2985 A; 1512 C; 1048 G; 1894 T; 0 other;
264 AGTGACATAT TCACATACAA AACCACATAA CATAGAGTAA ACATATTGAA AAGCCGCATA 60
265 CGTAAACAAT AAGTGACCAC CATGCTAATG TGGATCAAAT AACAAAAATA TCCACTCTGC 120
266 ATTTTGACAC CCCCATACTG TATGCCATCT GCGCAGTATG CATTCTAATA AACAAATTCT 180
267 TTGACAGCGG CACTTAGCCA TTCTTGTAAA CAAATCTTAA AGTCTGCCTG CTCTCTCTGA 240
268 GGCTTCTCCT CCACTTAAGA ATCCAAGAGC AATGCTCTCC CAAAAACACT AACATATTCT 300
269 TTAAGCAAGC ACAGAGGCTT CTCCTCATTT TCACTTTCAT TTGATTTTCA GTCTTAAGCT 360
270 GAACGTTAAT CAATAAACAA CACAATCGAT ACCGAAATTT TGATTCGTTT TATTTTGGCA 420
271 AAACTCAATT TTCAGCGTTG GTCTTAGTTC ATATTCGGAA CGGTCCATTT AATAGACTCA 480
272 AAACTATTTA TTGCAACCAT TTATTTGCAA TTGGCGCAGT CGATGTGATC AGTGTTAAAG 540
273 TTCCTTGATG CGGTAACCAG ATTTGCCAAT TCCTGTGTTC TTTTTGTTCT CTGACAAAAG 600
274 TACCACGATA ACGGGCACCC ACGTGACGGT TAATATCGCT TTAAGTTTTT AATTAAACCT 660
275 CGACAATAAA GTGAAACCGA AAAATCACAA TTTGCCTAAA CAAACCTGAA TTTATTATCA 720
276 GGAAGACGCT ATTGAATTTG TGAGAGGCTG TAAATCCAAT TGGTTACCTC AAAGACCCAC 780
277 GAAAAAGCTA TAGTGCAACC CTTGCGAAAA TCAAAACCTA TCTTAAAAAA AAAAAAAAAA 840
278 TATAAATAAT AAATTAATAA GCGAAAATTA AAACGTATTA AAAGTAAGAA TAATAAATAA 900
279 ATAAGTGAAA ATTCTATATG ATAAAAATTA AAAATAAGAA TAATAAATAA AAAGACAACA 960
280 TTTTAAATTA AACAATATTA AAAAAATATA AAAATATTAA AAACTATATT AAAAAAAAAA 1020
281 AAAAAACAAA AAAACAAAAA AAAAAAAATA AATAAATAAT CCAAAAATCA AAAATGGCTC 1080
282 AAGAACCAGC AATTGTGCCA CCACTATCAG ACAGCAACAT GACCCAGGTT GCCTACCAGA 1140
283 TTGGCAATGT GGAGAAATTC AACGGTGATC CAGGCTCACT ATACACCTTT GTGAGTCGAA 1200
284 TTGATTACAT ACTGGCTCTT TATGCTACCG GAGATGAACG CCAACAGCAG ATCATATTTG 1260
285 GGCATATTGA ACGCAGCATC AGCGGAGAAG TTATGCGCTG CATTGGAGCC TATGACATGT 1320
286 ACACCTGGCA GCAGCTTAGA AGACAATTGG TACTCAACTA TAAACCCCAG ACCCCTAACC 1380
287 ACGTTCTTTT AGAAGAGTTT CGAAAGACCC CATTTCGAGG CAATGTACGA GCATTCCTGG 1440
288 AAGAAGCAGA AAGCCGCAGA CAAACACTTA CTAGTAAGCT TGAATTAGAG CAAGATCTTG 1500
289 AAGAAAAGAC TTTTTATTTG AAATTAATAA AATCCAGTAT AGAATCACTA ATTGAAAAAT 1560
290 TACCTACACA CATTTATTTA AGAATAAATA ACCACAACAT ACCAGATTTG CGATCACTTA 1620
291 TAAACCTTTT ACAAGAGAAG GGCATGTACG AACAAATAAA TCATACAAGT ACACATGTCC 1680
292 AAAAACAAAA TTTCTCTGAT AAGCCACAAA AGTCCTTTAA TCAAAATACT AATCAGTCTA 1740
293 ACAATATCAG AAAATATCCA ACACCTTTCC TACATTATAA TTCACCAATA CCATATCAAG 1800
294 CTCCACAAAT TTATCAAACA CCACCAACTA ATAACCCACT TTATCGTCAT CCAATACCCT 1860
295 ACCACCCTAA TCCAAACAAT GTTTTTCAAC CAAGCCAACA AAACAATGTT TTCCAACCAA 1920
296 GCCAACAAAA CAATGCTTTT CAACCAAATC AACGAACAAA CTTTACATCT CGACCAATTT 1980
297 TTAACACCAA TCGAAACAAT GCATTCGATC AGAATAGGTT CGGACAACAA CCCCAATATC 2040
298 AAAATCAACA ATCAACACAA AATTCAAGTT CCTATGTACC CAATCGACCA ATAAAACGAT 2100
299 TAAGACCAGC TAATAGTGGA CAGACTGGGA TGAGTGTTGA CGAAACATTA TATCAAGAGG 2160
300 ACGCTTTTTA TCAGCAGTGT GTTCCATATG ACTATTTTTA TTATCCAACT TACGACCATT 2220
301 CAGACTATTA TCCAGAAAAT CAATATCAAA TTGACGAAAA CAACCAAAAT TTACAAAGAA 2280
302 CACAACAGTT ACAGCAGATT AATACAGACG AGACAAACAA TGACAACCAA GAACCCAATG 2340
303 TTGAACAGGC CGAAAATTTT CAGCCACAAG CCTTGGAAAA CCCCAATATA TAACAATTAA 2400
304 ATACAAAGAA AATAATTTGA AATGCCTTAT TGATACCGGA TCAACAGTTA ACATGACATC 2460
305 TAAAAATATA TTTGATTTAC CAATCCAGAA TACTAGTACT TTTATTCATA CCAGCAATGG 2520
306 ACCGCTCATT GTCAACAAAA GTATAATCAT ACCTTCAAAG ATTTTGTTCC CAACAACAAA 2580
307 TGAATTTTTA TTGCACCCTT TCTCTGAGAA TTACGATCTT TTATTAGGAA GAAAACTTTT 2640
308 AGCAGAAGCA AAAGCAACAA TAAGTTACCG CGATCAAGAG GTAACTCTTT ACAACAACAA 2700
309 ATACAAATTA ATAGAAGGAA TAGCAACACA TGAACAGAGT CATTTTCAAA ATGTAAATAT 2760
310 GATACCTGAC ACCATGCTCA GACAGCCAAA TAAAATTTCA CCCATTTTAG AATCAGACCT 2820
311 ATACAGATTG GAACATTTAA ATAACGAAGA AAAACAAAGA TTGTGCGCAC TCCTGCAGAA 2880
312 ATACCATGAC ATACAGTACC ATGAAGGTGA TAAGTTGACA TTTACTAATC AAACCAAACA 2940
313 TACTATCAAT ACAAAGCACA ATCTACCACT TTACTCTAAA TACAGTTACC CACAGGCTTA 3000
314 TGAACAGGAG GTCGAAAGCC AAATACAAGA TATGCTAAAT CAAGGTATTA TACGTACCAG 3060
315 TAATTCACCT TACAATAGCC CCATCTGGGT GGTTCCAAAG AAACAAGATG CATCAGGCAA 3120
316 ACAGAAATTT AGAATTGTAA TAGACTACCG AAAATTAAAT GAAATAACAG TAGGAGACAG 3180
317 ACACCCAATC CCAAACATGG ACGAAATCTT GGGAAAATTG GGCAGATGTA ATTACTTCAC 3240
318 AACTATAGAC TTGGCAAAGG GTTTCCACCA GATCGAAATG GATCCAGAAT CAGTTTCAAA 3300
319 GACAGCCTTT TCTACCAAGC ACGGTCATTA TGAATATTTG CGCATGCCAT TCGGATTAAA 3360
320 AAACGCGCCA GCCACCTTTC AACGGTGCAT GAATGATATT TTAAGACCAC TCTTAAACAA 3420
321 ACACTGTCTT GTGTATTTGG ACGACATAAT TGTATTCTCG ACATCCCTTG ATGAACACCT 3480
322 GCAATCGCTC GGACTAGTTT TCGAAAAATT AGCAAAAGCC AACCTTAAAT TACAACTTGA 3540
323 CAAATGTGAG TTTCTCAAGC AAGAAACCAC ATTTTTAGGA CATGTTCTAA CACCAGATGG 3600
324 AATAAAACCA AACCCTGAAA AAATTGAAGC CATTCAAAAA TATCCAATTC CCACTAAACC 3660
325 AAAAGAAATA AAAGCTTTTC TTGGACTGAC AGGATATTAT CGTAAATTTA TTCCAAACTT 3720
326 TGCAGACATA GCCAAACCCA TGACTAAGTG TTTAAAAAAG AACATGAAAA TTGACACTAC 3780
327 CAACCCAGAA TATGACTCTG CATTTAAAAA ATTAAAATAT CTAATATCAG AAGACCCAAT 3840
328 TCTTAAAGTA CCCGACTTTA CAAAGAAATT CACTTTAACC ACAGACGCAA GTGATGTCGC 3900
329 TTTGGGGGCA GTACTGTCAC AAGATGGACA CCCACTTAGC TACATTAGCC GAACACTTAA 3960
330 TGAACACGAA ATAAATTACA GCACAATTGA AAAAGAACTC TTAGCAATTG TATGGGCGAC 4020
331 AAAGACTTTT CGACACTACC TACTTGGAAG ACACTTTGAA ATATCCAGTG ACCATCAACC 4080
332 ATTGAGCTGG TTGTACCGTA TGAAAGACCC AAATTCAAAA CTGACCCGAT GGAGAGTAAA 4140
333 ATTATCCGAA TTCGATTTTG ATATAAAATA TATAAAAGGA AAAGAAAATT GCGTGGCGGA 4200
334 TGCTCTGTCC AGAATAAAAC TTGAGGAGAC ATATTTGAGC GAACAAACCC AACATAGTGC 4260
335 AGAAGAGGAC AATAGTGATT TAATTTTTAT TACAGAAAGA CCTCTAAATA CATTTAACAG 4320
336 ACAAGTTATA TTTTCAAAAG GACCACCAGA CATTAAAGTT ACGAAATATT TCAAAAAACA 4380
337 CATCACCCAA ATATTTTACG ACATTATGAC CAGGGAAAAA GCCGAACAAT ATTTGATAGA 4440
338 CCATTTTTGT GGTAAGAAAA GTGCGTTGTA TATTGAGAGT GACGCTGATT TCGAAGTCAT 4500
339 TCAAGCCGCA CATAAATTAG CCATAAACAC CAAATATACA AAAATCCTGC GTAGCACGAT 4560
340 TTTGTTAAAA AACATAACCA CTTATGCGGA ATTTAAGGAA TTGATCTTGA CTGCTCATGA 4620
341 AAAACTTCTA CACCCAGGCA TACAGAAAAC TACTAAACTT TTCGGAGAAA CTTACTATTT 4680
342 CCCTAATAGC CAGCTACTTA TTCAGAATAT AATAAATGAG TGCAGTATTT GCAATCTGGC 4740
343 AAAAACAGAG CACCGAAATA CAGACATGCC AACGAAAACC ACACCCAAAC CAGAACATTG 4800
344 CCGCGAAAAA TTCATGATAG ACATTTACTC ATCCGAAGGC AAACATTACG TTAGTTGCAT 4860
345 AGACATTTAT TCGAAATTTG CCACATTAGA AGAAATAAAA ACAAAAGACT GGATAGAATG 4920
346 CAAAAACGCG CTTATGCGCA TATTCAACCA GCTTGGCAAG CCAAAGTTAC TAAAGGCGGA 4980
347 CAGAGACGGC GCATTTTCCA GTTTAGCCCT CAAGAGATGG CTGGAGAGTG AGGAAGTCGA 5040
348 ATTGCAGCTT AACACAACAA AAACTGGTGT GGCGGACATA GAAAGACTAC ATAAAACAAT 5100
349 TAATGAAAAG ATTCGCATAA TCAAAACATC CGATGACGAA GAAACCAAAT TGAGCAAAAT 5160
350 GGAAACAGTA CTTAACATAT ACAATCATAA AACCAAACAC GACACCACTG GACAGACCCC 5220
351 TGCACACATA TTTCTCTACG CTGGACAACC AATATTAGAT ACCCAACAAA ACAAAGAAAA 5280
352 CAAAATAAAC AAAATAAATA ATGACAGAGT GGAGTACGAA GTCGACACAA GATACAGAAA 5340
353 AGGTCCACTA CAGAAAGGCA AATTAGAAAA TCCTTTTAAG CCAACAAAAA ATGTGGAGCA 5400
354 GACTGACTCT GATCATTATA AAATTACTAA TAGAAATAGA ATTACTCACT ACTACAAAAC 5460
355 ACAATTCAAA AAACGAAAGA AAAATAATCA GCTCTCAATT TCACAGGCAC CTGGCACTTG 5520
356 ATAACATTGC TGCTGATGCT GATCACAACA GTTCATGGAC AACAAATTGA AATTAATAAT 5580
357 ATTGACACAA ACCACGGATA TCTCCTTTTT TCTGATAAAC CAGTCCAGAT ACCATCATCC 5640
358 TTTGAACATC ATTGCTTGAG AATCAATTTA ACTGAAATAG ACACCATAGC TGATTATTTT 5700
359 GAGCAAAGAC TACGTACCGA CTACCATGCA CCCCAGGTCA AATTTTTATA CAACAAAATG 5760
360 AGAAGAGAAC TAGCTGGAAT AGCCTTGCGA CATAGAAATA AACGGGGACT TATTAACATT 5820
361 GTAGGTTCAG TTTTTAAATA CCTATTTGGC ACACTTGACG AAAATGATCG AGTGGATATA 5880
362 CAGAGGAAAC TTGAAACAAA CGCCCATAAC TCGGTAAATT TACATGAACT CAATGACGCT 5940
363 ATTCAATTAA TAAATGACGG AATGCAAAAG ATACAGAATT ATGAAAACAA CAGCAACATC 6000
364 ATTAACAGTC TTTTATATGA ACTCATGCAG TTTACAGAAT ACATAGAAGA TGTGGAAATG 6060
365 GGAATGCAGC TTTCCAGACT CGGTCTATTT AATCCCAAAC TACTAAACTA CGATAAACTT 6120
366 GAGAATGTAA ACAGCCAAAA TATTTTAAAC ATTAAAACAT CCACTTGGAT TAATTACAAT 6180
367 GATAACCAAT TATTAATCAT ATCTCACATA CCTATTAACT TTTCATTAAT AAATACAGTA 6240
368 AAAATAATCC CTTACCCAGA CTCGAACGGC TATCAGCTAG AATACACAGA CACACAATCA 6300
369 TATTTTGAAA GAGAAAATAA AGTTTACAAT AACGAAAATA AAGAAATAAA CAATGAGTGT 6360
370 GTCACCAACA TTATTAAACA TTTAAAACCA ATTTGTAATT TTGAGTCAAT CCACACAGAT 6420
371 GAAATAATAA AATACATAGA ACCAAACACA ATTGTAACCT GGAATTTAAC CCAAACAAGT 6480
372 CTCAAACAAA ATTGTCAAAA TTCATTTAAT AATATAAAAA TAAAAGGAAA CAAAATGATA 6540
373 AAAGTAACCC AATGTAAAAT AGAAATCAAT AGCATAATTC TAAGTGAAAA TCTCTTTAAA 6600
374 CCAGAAATAG ATTTGACACC ATTATACACA CCACTTAACA TAACAAAAAT AAAAACTGTT 6660
375 AAACACAACG ACATTAATGA AATGATTTCA CAAAACAATA TTACACTTTA CATATTTATG 6720
376 ACTACTGTCA TCATTATACT TATTTTATTG TACTTATATT TAAGATACGT ATCATTTAAC 6780
377 CCATTCATGA TGCTGTATGC AAAACTAAAA TTAAGAAAAA ATCAAAATCA AAACACAGCA 6840
378 CAACAAATAG AAATGGAAGA CGTTCCATTA CCCCTACTAT ATCCATCAAT CCCAGCCCAA 6900
379 GTATAGGCTT CTCTTTAAGG GAAGGGAAGT GACATATTCA CATACAAAAC CACATAACGT 6960
380 AGAGTAAACA TATTGAAAAG CCGCATACGT CAACAATAAG TGACCACCAT GCTAATGTGG 7020
381 ATCAAATAAC AAAAATATCC ACTCTGCATT TTGACACCCC CATACTGTAT GCCATCTGCG 7080
382 CAGTATGCAT TCTAATAAAC AAATTCTTTG ACAGCGGCAC TTAGCCATTC TTGTAAACAA 7140
383 ATCTTAAAGT CTGCCTGCTC TCTCTGAGGC TTCTCCTCCA CTTAAGAATC CAAGAGCAAT 7200
384 GCTCTCCCAA AAACACTAAC ATATTCTTTA AGCAAGCACA GAGGCTTCTC CTCATTTTCA 7260
385 CTTTCATTTG ATTTTCAGTC TTAAGCTGAA CGTTAATCAA TAAACAACAC AATCGATACC 7320
386 GAAATTTTGA TTCGTTTTAT TTTGGCAAAA CTCAATTTTC AGCGTTGGTC TTAGTTCATA 7380
387 TTCGGAACGG TCCATTTAAT AGACTCAAAA CTATTTATTG CAACCATTTA TTTGCAATT 7439
388 //