Repository 'msp_fasta_tabular_converter'
hg clone https://toolshed.g2.bx.psu.edu/repos/drosofff/msp_fasta_tabular_converter

Changeset 0:951cb6b3979b (2015-06-21)
Next changeset 1:2f7278120be9 (2016-03-22)
Commit message:
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
added:
fasta_tabular_converter.py
fasta_tabular_converter.xml
test-data/input.fa
test-data/output.faw
test-data/output.tab
b
diff -r 000000000000 -r 951cb6b3979b fasta_tabular_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_tabular_converter.py Sun Jun 21 14:28:49 2015 -0400
[
@@ -0,0 +1,88 @@
+#!/usr/bin/python
+#
+import sys
+from collections import defaultdict
+
+def readfasta_writetabular(fasta, tabular):
+  F = open(fasta, "r")
+  for line in F:
+    if line[0] == ">": continue
+    else:
+      seqdic[line[:-1]] += 1
+  F.close()
+  F = open(tabular, "w")
+  for seq in sorted(seqdic, key=seqdic.get, reverse=True):
+    print >> F, "%s\t%s" % (seq, seqdic[seq])
+  F.close()
+    
+        
+def readtabular_writefasta(tabular, fasta):
+  F = open(tabular, "r")
+  Fw = open(fasta, "w")
+  counter = 0
+  for line in F:
+    fields = line.split()
+    for i in range(int(fields[1])):
+      counter += 1
+      print >> Fw, ">%s\n%s" % (counter, fields[0])
+  F.close()
+  Fw.close()
+
+def readtabular_writefastaweighted (tabular, fasta):
+  F = open(tabular, "r")
+  Fw = open(fasta, "w")
+  counter = 0
+  for line in F:
+    counter += 1
+    fields = line[:-1].split()
+    print >> Fw, ">%s_%s\n%s" % (counter, fields[1],  fields[0])
+  F.close()
+  Fw.close()
+
+def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed):
+  F = open(fastaweigthed_input, "r")
+  number_reads = 0
+  for line in F:
+    if line[0] == ">":
+      weigth = int(line[1:-1].split("_")[-1])
+      number_reads += weigth
+    else:
+      seqdic[line[:-1]] += weigth
+  F.close()
+  F = open(fastaweigthed_reparsed, "w")
+  n=0
+  for seq in sorted(seqdic, key=seqdic.get, reverse=True):
+    n += 1
+    print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq)
+  F.close()
+  print "%s reads collapsed" % number_reads
+
+def readfastaeighted_writefasta(fastaweigthed, fasta):
+  F = open(fastaweigthed, "r")
+  Fw = open(fasta, "w")
+  counter = 0
+  for line in F:
+    if line[0] == ">":
+      weigth = int(line[1:-1].split("_")[-1])
+    else:
+      seq = line[:-1]
+      for i in range (weigth):
+        counter += 1
+        print >> Fw, ">%s\n%s" % (counter, seq)
+  F.close()
+  Fw.close()
+
+
+seqdic = defaultdict(int)
+option = sys.argv[3]
+
+if option == "fasta2tabular":
+  readfasta_writetabular(sys.argv[1], sys.argv[2])
+elif option == "tabular2fasta":
+  readtabular_writefasta(sys.argv[1], sys.argv[2])
+elif option == "tabular2fastaweight":
+  readtabular_writefastaweighted (sys.argv[1], sys.argv[2])
+elif option == "fastaweight2fastaweight":
+  readfastaeighted_writefastaweighted(sys.argv[1], sys.argv[2])
+elif option == "fastaweight2fasta":
+  readfastaeighted_writefasta(sys.argv[1], sys.argv[2])
b
diff -r 000000000000 -r 951cb6b3979b fasta_tabular_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_tabular_converter.xml Sun Jun 21 14:28:49 2015 -0400
b
@@ -0,0 +1,75 @@
+<tool id="fasta_tabular_converter" name="fasta - tabular" version="1.0.2">
+  <description>conversions</description>
+  <command interpreter="python">fasta_tabular_converter.py $input $output $switch.conversionType</command>
+  <inputs>
+    <conditional name="switch">
+       <param name="conversionType" type="select" label="conversion option">
+          <option value="fasta2tabular" selected="true">fasta to tabular</option>
+          <option value="tabular2fasta">tabular to fasta</option>
+          <option value="tabular2fastaweight">tabular to weighted fasta</option>
+          <option value="fastaweight2fastaweight">recompile weighted fasta to catenated fasta weighted</option>
+          <option value="fastaweight2fasta">fasta weighted to fasta</option>
+       </param>
+    <when value="fasta2tabular">
+       <param name="input" type="data" format="fasta" label="fasta file to convert to tabular"/>
+    </when>
+    <when value="tabular2fasta">
+       <param name="input" type="data" format="tabular" label="tabular file to convert to fasta"/>
+    </when>
+    <when value="tabular2fastaweight">
+       <param name="input" type="data" format="tabular" label="tabular file to convert to fasta weighted"/>
+    </when>
+    <when value="fastaweight2fastaweight">
+       <param name="input" type="data" format="fasta" label="catenated fasta weighted to recompile" help="Use this option only if you known what you're doing"/>
+    </when>
+    <when value="fastaweight2fasta">
+       <param name="input" type="data" format="fasta" label="fasta weighted file to convert to fasta"/>
+    </when>
+   </conditional>
+
+   </inputs>
+
+ <outputs>
+   <data format="fasta" name="output" label="${switch.conversionType} conversion">
+     <change_format>
+        <when input="switch.conversionType" value="fasta2tabular" format="tabular"/>
+        <when input="switch.conversionType" value="fastaweight2fastaweight" format="fasta"/>
+        <when input="switch.conversionType" value="fastaweight2fasta" format="fasta"/>
+     </change_format>
+   </data>
+</outputs>
+
+    <tests>
+        <test>
+            <param name="conversionType" value="fasta2tabular" />
+            <param ftype="fasta" name="input" value="input.fa" />
+            <output file="output.tab" name="output" />
+        </test>
+        <test>
+            <param name="conversionType" value="tabular2fastaweight" />
+            <param ftype="tabular" name="input" value="output.tab" />
+            <output file="output.faw" name="output" />
+        </test>
+    </tests>
+
+
+<help>
+
+**What it does**
+
+Converts fasta files to tabular files with sequence on first column and occurence of the sequence in the second column; and reciprocally.
+
+This format is suitable for storage of sequence datasets in the data library, and will be used in the future.
+
+Regeneration of the original fasta file from the tabular format is ensured by the same tool
+
+This tool also handle a fasta "weigthed" format with headers as:
+>id_numberofreads
+ATGCATGACCAGATAGGAC
+etc...
+
+with generation of the "weigthed" format from a tabular format, and recompilation of catenated weighted fasta files
+
+</help>
+
+</tool>
b
diff -r 000000000000 -r 951cb6b3979b test-data/input.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fa Sun Jun 21 14:28:49 2015 -0400
b
@@ -0,0 +1,500 @@
+>1
+TAGTTACCTTCATATCTCTCTTTA
+>2
+TCTATTCATACAAAACACTAATACCC
+>3
+ACAACCTCAACTCATATTT
+>4
+TATAATTTTATTTTATATTTTCTCT
+>5
+TCTTCTATATAATCCTTTATTATAA
+>6
+TAAAATAAACCAAAACCCAAAAAT
+>7
+AATCTACAATTTCCATTACGACTCC
+>8
+TTTCCGACAAATACACCATCTTC
+>9
+CAGATTCACTGATTTTCTTACGCC
+>10
+CAAGAATACAAAAAATACTAATTA
+>11
+AACTCTCTAATTTAACTTTGTGC
+>12
+AAAAACACACAGACACAAGCAGCAAT
+>13
+TTACTAGATCCACCCTCATTA
+>14
+ATGTTATTTACTTTTTCCCCTTATA
+>15
+CGATATTTTCTCCTCTTACC
+>16
+TACAGAGAAATATACAACACTCACC
+>17
+ATGAAATTCGAACAATACGTC
+>18
+GAGAATAAATATTTCAATGGTCTATTG
+>19
+TGCTTTTACTACATATTTTTTATTTTTTTA
+>20
+CATACCTTAAATTATCTCTTTCTT
+>21
+TTTGTTTTTCATTTTTTTATCTTT
+>22
+TTATTATCTATTTTAATTTTTCTTAA
+>23
+TATTATCATCTCGTTCTTCCTTCTC
+>24
+TCAACTGGCAAGAATTTTTGAAAATT
+>25
+TACAAATTTTTTTTTCTTTCTTAT
+>26
+TTTTCTTTTTTTCATTTTCTCTTTTA
+>27
+AAGATGGAGTAGTTTTTT
+>28
+TCTCATCTTACAATTTTTTAAAACTT
+>29
+CATACCTACAAAAAAGCTTCTCTTAC
+>30
+TTTTATATTTCCTTATATCTTTACTA
+>31
+GGAGATTGTAGAACGAAAGGAAAAT
+>32
+TCTATTTCTTTATTTTTTTTATTAT
+>33
+CGGACGGTATATTTTTTAATATAA
+>34
+TTCTTGGACTACACATTTTTTATTGTTTTA
+>35
+TACTATATACTTCTTCAAATCACA
+>36
+ATTTTACTTCATCATTTTC
+>37
+TATTTCCAACCTTCAACCTCAAATAA
+>38
+CACGACTTTATTCTTTTTATCTCA
+>39
+TTCTTTTACTACATATTTTTTATTTTTTTA
+>40
+TTTATTACAACCCTATCTTACCTCAA
+>41
+CGATATTTTCTCCTCGTACC
+>42
+TAATTACCATTGCTAACTATCCA
+>43
+CATTAATTCATCCATTTAAACTAA
+>44
+GAAACAAACAACACATACCCTCTGGC
+>45
+TACTTTTTTCTTAATTTTTTATTAAAC
+>46
+TAACTTTAACTTTTTTACT
+>47
+TTCTTTTACTACATATTGTTTATTTTTTTA
+>48
+TCTAGTCTGAGCGTAGTACCAGATTG
+>49
+TTTTTTATCAATTTTCACCATTCAT
+>50
+AATGACACACTCTTCATCAAC
+>51
+TAACATAAATTTTAATCATAAATTG
+>52
+TCTATATTATTTTTATCAATTTTCACC
+>53
+TCCCAACCCTCGAGCATCATTTTC
+>54
+TAGTCATACATACCTAATTATACATA
+>55
+TACAAAAAATGCGAAAATTGACCCT
+>56
+GAGAACTTTTAATCATTTTAC
+>57
+TCTTATTTTAATCTTCCAATTTC
+>58
+CGGCACATGTTGAATTACACTCA
+>59
+CAGTTTCACAAAAGATCTTTTAA
+>60
+GCCAACGACCATACCACGA
+>61
+CAAATAACAAACTGAATAAACGAAA
+>62
+TAGTTTCACTACTTTATTCTTTTTA
+>63
+TGAGGAAAACAGAAAAATGAGAGACA
+>64
+TATATAAATCTTCAACATCAA
+>65
+TGATTTACTTACATTCTTTTTTT
+>66
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+>67
+TACTGAAAACGGGCGCATATCAGTGG
+>68
+TATTCAATCACTCCATTATATATAACA
+>69
+TATATTGCCTCCCCATAATCCTT
+>70
+TCGCCGTAAAGCCAGTCGTTCTCC
+>71
+TTTAAACACTTCCTACATCAAATTTC
+>72
+TTTGTTTTTTACTATATTT
+>73
+TCTTTTTTTTAATACTTATTTTCATT
+>74
+TAAGTTTTTAATCATTTTTTTT
+>75
+TGAGAATGACTTCTTCACGATCTCTT
+>76
+AATTTATTTAATTTATATTCTAACTAA
+>77
+AAAAAATATCTTTTTTAACTCGTGGCC
+>78
+AAGACAACAATGACATATAAGACG
+>79
+TGAGCTAGAACTGCACCCACTCCA
+>80
+ACTAAACTTTTCTTACCATATTTCTA
+>81
+TATTTCACTTTATACTTCCTTAA
+>82
+TATCTATCTTTGATCTTCTTTTCA
+>83
+TTTTTATCAATTTTCACCATTC
+>84
+TAAAAATAATTGTCTTTAATTTCA
+>85
+TTAAAGACGCAACAACTAACATT
+>86
+TAACCTTGCAGAACTATACGATTCAAA
+>87
+TACTATTTTATTATACATACATACATTA
+>88
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+>89
+AATGTCACTTGAAGAATTCACGT
+>90
+TAATAAGAAACTGTTCAAACAATCCAC
+>91
+AAGCACGCCTTACCACAATTTATAA
+>92
+CCATATATGACTGACTCATTTCAC
+>93
+TTAATATGTAATTTCATACCTCAC
+>94
+AGGATTTTTAAGCCCATATGTTTCC
+>95
+ACAGCAGGACGGTGATCA
+>96
+TGATGACGGGCAGCAGGGATTTTC
+>97
+TTGTAAAACATTCTTTCTCCTGAC
+>98
+TTCTTCCTTTTATCCTCTCTTAA
+>99
+ATTATTAATAAATTATTATAA
+>100
+CTGGAAACTATTGATCAAATT
+>101
+TACAACTAACATCCTTTCTTCTTCC
+>102
+TCAAATGCAAATTGGATTTATGA
+>103
+TCCTGAGGACGAGGGGCGTTTAGC
+>104
+TACACAGACTTACAAAACACATCCTTC
+>105
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+>106
+TTAAACAATTTGGAATTAATT
+>107
+TAGCCTTTACTAGGCTTTTTCTAA
+>108
+CGATATTTTCTCCTCTTACCT
+>109
+TGGAATGTAAAGAAGTATGGAG
+>110
+TTTTTAACTCCCATCATTTTTCCTC
+>111
+TTTTTTATCATTTTTCACCTAAAAAA
+>112
+GAACAATTTTTCAATTTTTTACATTA
+>113
+TATGGATTATTTCAAAATTTTTTTTT
+>114
+TAACGGAGCACGAGAACGAAGTGG
+>115
+CAATTTTTAATTCCTTTTTTCTTCTT
+>116
+AGATGTTGATCTAAACTCTCCCA
+>117
+TGAGCGGAGAACCAGAGTTGATGAGC
+>118
+TAACAAATAGAACGTTCTAATTTAAA
+>119
+CTAGACAAGATGCTATAAATTTTAAA
+>120
+TTTTCTTTACCCATCTTTACTTTCCC
+>121
+TACACACTCATCAACCAAAGGACG
+>122
+TCTTATTATCATTTTTTTATCCCTT
+>123
+TCAGAGTTCTACAGGTCCTACGATT
+>124
+TTTATTACTTAGTCATAATTCCAA
+>125
+GCCGGGGCGTGAGATGTCTGCATTA
+>126
+GACGAAACGCAACAACAAAATGGACG
+>127
+TAGACTTTCTACTCATTATTAC
+>128
+AAATTGCAAAGATGGAAAATAAAACT
+>129
+CCTTACTCAACATACTTAATCATACTTA
+>130
+TGCTTGGACTACATATGGTTGAGTG
+>131
+GAATGATCGCACCACCACCTCAACGTT
+>132
+TCCACCTATTTATCTTTTCTT
+>133
+TGGACAAGAACCACGCGACGGGTGT
+>134
+CAAGATATGAACAAAGCAAAGACAC
+>135
+CAAACGGAACAAGACATCACCATC
+>136
+NATTCTTACTCCATTTCAATTTACT
+>137
+TAGAACTCGAACCAGAGCTCC
+>138
+CGGAAAAGAATGTAGACCATTTAA
+>139
+TACAAACGGAACTTTCGTCATAA
+>140
+GGTATCTTTATATTTTAATTTTCTT
+>141
+TATTCCGACAATACCTTCTTTAC
+>142
+AACTTTAAATTTTTAATAACCTT
+>143
+CATAATATAAACTTATCTT
+>144
+TATTTATAAATTTTTTCTTGAGAC
+>145
+TTTTTTTTTTGTTTTTATTTTTATCAT
+>146
+TATTATACATAGAATAACAAATCTTT
+>147
+TGGAGTAGCACAGTCGTCTGAAATC
+>148
+TATTTCTTTTTTAACTTCTTTTC
+>149
+TTATAATCACGGCACCCTATACA
+>150
+TTCTTTTACTACATATTTTTTATTTTT
+>151
+TAGCGAGATGGACCAACGTGCTGT
+>152
+CCAGAAAACAATACAACATCCTCA
+>153
+TCCGAAAACAAGGCCCGTCGCT
+>154
+TACTCAATAGAACTCTACTCACTCATA
+>155
+AACGGGGAATAAGGGTTCG
+>156
+TCAGTCTTTTTTTCTCTCCTA
+>157
+AATATAAAAATACAATCAACCATTGCA
+>158
+GGACGGAGAACTGATAAGGGCA
+>159
+TAAAGAAGAAGAATTGATTTTAAT
+>160
+TCATTACACTTCTTACAAAAC
+>161
+CCGCGATCTGCTTATTTATAATCTT
+>162
+TCTAAACACCCACGAAAATCTCTTAC
+>163
+AACAGGAAAAACAGAAGGATTTCTA
+>164
+TCTCTTTTATTTTTATCTTTCCTT
+>165
+AACATTTTATCAATTATACATTA
+>166
+GCAGATAGAAATCAATACAAAAATC
+>167
+TTAATGACACACGGGAAAAACACCG
+>168
+TACAGACAACACATACGGACTTAAGT
+>169
+TCCACAACAACTCTATCTAAAGCATT
+>170
+ATAAAGCTAGATTACCAAAGCAT
+>171
+TACCTCTTTATTAACCTCCACCTCTA
+>172
+TACACCTCTTTTTACTTTTTTATT
+>173
+CACCGAACCGGGAAGGCGAACAAC
+>174
+TAGGTACTTACCTTTTTTTTACACAA
+>175
+AGGAATATGATGAAATAAAAAAAT
+>176
+TATTATTTTTTTATTCCATTCATAT
+>177
+TAAAATAAATAAGTCCGACGACAA
+>178
+TCTGTATTTGACTTATTACTTTCTCC
+>179
+AAGGAATTAAAGCAATAATTCTAA
+>180
+TTCATTTTATTTTTAAATATCTTTTTT
+>181
+TTAGTATTAATCTTCACTTAA
+>182
+TATGGAGAAACAGCGATATAAGTCA
+>183
+CCCGAAAAGCCGAGGACGACTTA
+>184
+CACAGACTGAGGCAGAAAAAACAA
+>185
+TCAAGCCTTTTGAAGAACTGACCTAAA
+>186
+TAAGAAACTGAGCTAACGCAATGTACC
+>187
+CGATATTTTCTCCTCTGACC
+>188
+TAACTACTTTTACATTAATACTAA
+>189
+ACCAGCACCTTCCGACTCAACGTCAAA
+>190
+TCAAAGAACAATGTAAAGCCGCGAC
+>191
+TTGATTCTTCTTTTTCACAAAA
+>192
+TACAAAACAAACAAATTACAATCTAAA
+>193
+GAAACCATTATCTTATCTTTATACA
+>194
+CTATATTTTCTCTCTTACC
+>195
+TCAGAAGAACAGAGAATTGATTTT
+>196
+TAATAAATTATTAAATAAAAAAAAAA
+>197
+CCGACCGAGCAAATAAACACAGGAACG
+>198
+AAGTGAAGAAGTAGTTTTT
+>199
+TGGACGGAGAACTGATAAGGGC
+>200
+TGCTTGGACTACATATGGTTGAGGG
+>201
+TACAAGACTAAAACAAACGTGAAGT
+>202
+TGAAACTGAAACTAACATACAAAATATT
+>203
+TATCTGATCAACAATCTTTTCCCAT
+>204
+TTTAAGACTTATGAGCTTG
+>205
+CAAGGCTCAGAAGAACATCACCAAGACC
+>206
+TTCAAGTAGATTGCATTTTTTAATA
+>207
+CGCAACCAGCAGCAACTCCTAGCAT
+>208
+TACAAACGGAACTTTCTTCATAACTTC
+>209
+ACAAATCATAAATTTTTTTTTACT
+>210
+TCCGAAAAATCGTAGGACCCGGGCA
+>211
+CGCCGCAAGATGAATACTCTAATGA
+>212
+TGACCAAAGACAAACAAACAATAAATA
+>213
+AAAAGGAAAAACAGAAAAATTGGG
+>214
+TTCTTGATAACGCATCTTCTACAT
+>215
+TACTGAAACAAGGAAACACAAGC
+>216
+TCAAAAAGTAATAGGGATCGTTA
+>217
+TGGAATGTAAAGAAGTATGGAG
+>218
+TAAAATTGTAATATTTAAATAATAT
+>219
+GAGGATTAAAAGAACGGTTTATAA
+>220
+TTCTTTGACTACATATTTTTTATT
+>221
+TACAAACGTAATTTTCGCATAACATC
+>222
+AGAACAATTAAATAAAATAGCATA
+>223
+TAATAATTTAAATAAATATAAATTT
+>224
+AATTGCAACAGAGACTGGAA
+>225
+TTAAGTTTTAGACATAATCTATTACAA
+>226
+TGAAAGGAAAAACAGGACACGGGA
+>227
+AACAGGGAGATCAACAGCGTTGACA
+>228
+GGACGGAGAACTGATAATGGC
+>229
+TGTGTAATCTTTCTACTTCTTCTAC
+>230
+TTTACCAGAGGAGTCGAGTTTTT
+>231
+AAAATCGACTGCCGAAAACATTTTAA
+>232
+TGCTTGGACTACATATGGTTGAGTG
+>233
+TACAGACAACACATACGGACTTAA
+>234
+GAAGAGGAGGAGGAGTTTGTAAG
+>235
+TTACAATCTACTATTCTTTTATTA
+>236
+TTATTACATCGTCCACATATAACAAAA
+>237
+TTTTTAACTCATTTTACAATTAAAC
+>238
+AAAACCCGGACAAACCATCGGAGGA
+>239
+CAACACATGACGCGACAATTCTTG
+>240
+TACACACTCATCAACCAAAGTACGTA
+>241
+TTTTCATTTCTTCTTCAAATCCTTT
+>242
+AACAACTGCAAACATCTACCACA
+>243
+TAGTTTTAAATATTTCTTTTTTTC
+>244
+AAAGACAAAAGAAATACAGGCACT
+>245
+TTTGATACCTTTATACCATACCTATT
+>246
+ACAGCAGGACGGTGATCA
+>247
+TTCTAGCACAACACGCACACATATA
+>248
+TAATCAAAAAACTCTTCATTTTTA
+>249
+TAGGAACTTCATACCGGTCTC
+>250
+TATTTCAGCAACAGACTAAGACTAA
b
diff -r 000000000000 -r 951cb6b3979b test-data/output.faw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.faw Sun Jun 21 14:28:49 2015 -0400
b
b'@@ -0,0 +1,494 @@\n+>1_2\n+ACAGCAGGACGGTGATCA\n+>2_2\n+TGCTTGGACTACATATGGTTGAGTG\n+>3_2\n+TGGAATGTAAAGAAGTATGGAG\n+>4_1\n+AATTTATTTAATTTATATTCTAACTAA\n+>5_1\n+TTATAATCACGGCACCCTATACA\n+>6_1\n+TCCGAAAAATCGTAGGACCCGGGCA\n+>7_1\n+TCCCAACCCTCGAGCATCATTTTC\n+>8_1\n+TTGATTCTTCTTTTTCACAAAA\n+>9_1\n+AGAACAATTAAATAAAATAGCATA\n+>10_1\n+CCAGAAAACAATACAACATCCTCA\n+>11_1\n+TCTAGTCTGAGCGTAGTACCAGATTG\n+>12_1\n+TTTTTAACTCCCATCATTTTTCCTC\n+>13_1\n+GCAGATAGAAATCAATACAAAAATC\n+>14_1\n+AAGTGAAGAAGTAGTTTTT\n+>15_1\n+AATGTCACTTGAAGAATTCACGT\n+>16_1\n+TTTACCAGAGGAGTCGAGTTTTT\n+>17_1\n+GGTATCTTTATATTTTAATTTTCTT\n+>18_1\n+TTACTAGATCCACCCTCATTA\n+>19_1\n+TATATAAATCTTCAACATCAA\n+>20_1\n+GAAACCATTATCTTATCTTTATACA\n+>21_1\n+GGAGATTGTAGAACGAAAGGAAAAT\n+>22_1\n+ACTAAACTTTTCTTACCATATTTCTA\n+>23_1\n+TCAAGCCTTTTGAAGAACTGACCTAAA\n+>24_1\n+TAACATAAATTTTAATCATAAATTG\n+>25_1\n+TAGCGAGATGGACCAACGTGCTGT\n+>26_1\n+TACAAACGTAATTTTCGCATAACATC\n+>27_1\n+CGGAAAAGAATGTAGACCATTTAA\n+>28_1\n+GAAAGGAAGGGAAGAAAGCGAAAGGA\n+>29_1\n+CAAGAATACAAAAAATACTAATTA\n+>30_1\n+CATACCTACAAAAAAGCTTCTCTTAC\n+>31_1\n+TAAAATAAATAAGTCCGACGACAA\n+>32_1\n+AGGAATATGATGAAATAAAAAAAT\n+>33_1\n+TTTTTTTTTTGTTTTTATTTTTATCAT\n+>34_1\n+TTTTTAACTCATTTTACAATTAAAC\n+>35_1\n+CCGCGATCTGCTTATTTATAATCTT\n+>36_1\n+TAGGTACTTACCTTTTTTTTACACAA\n+>37_1\n+TCATTACACTTCTTACAAAAC\n+>38_1\n+TATTATACATAGAATAACAAATCTTT\n+>39_1\n+ATGTTATTTACTTTTTCCCCTTATA\n+>40_1\n+TCCGAAAACAAGGCCCGTCGCT\n+>41_1\n+TGCTTTTACTACATATTTTTTATTTTTTTA\n+>42_1\n+GAACAATTTTTCAATTTTTTACATTA\n+>43_1\n+AACATTTTATCAATTATACATTA\n+>44_1\n+TATCTGATCAACAATCTTTTCCCAT\n+>45_1\n+AATTGCAACAGAGACTGGAA\n+>46_1\n+TATTCAATCACTCCATTATATATAACA\n+>47_1\n+CAAACGGAACAAGACATCACCATC\n+>48_1\n+TCTTCTATATAATCCTTTATTATAA\n+>49_1\n+TGATGACGGGCAGCAGGGATTTTC\n+>50_1\n+TTTATTACAACCCTATCTTACCTCAA\n+>51_1\n+AACAGGAAAAACAGAAGGATTTCTA\n+>52_1\n+CGATATTTTCTCCTCGTACC\n+>53_1\n+AAATTGCAAAGATGGAAAATAAAACT\n+>54_1\n+TACACACTCATCAACCAAAGGACG\n+>55_1\n+TATATTGCCTCCCCATAATCCTT\n+>56_1\n+TACAAACGGAACTTTCTTCATAACTTC\n+>57_1\n+GGACGGAGAACTGATAATGGC\n+>58_1\n+TTCTTTGACTACATATTTTTTATT\n+>59_1\n+TCTTTTTTTTAATACTTATTTTCATT\n+>60_1\n+TACTTTTTTCTTAATTTTTTATTAAAC\n+>61_1\n+AATATAAAAATACAATCAACCATTGCA\n+>62_1\n+CAGTTTCACAAAAGATCTTTTAA\n+>63_1\n+GAAACAAACAACACATACCCTCTGGC\n+>64_1\n+AATGACACACTCTTCATCAAC\n+>65_1\n+AAGATGGAGTAGTTTTTT\n+>66_1\n+ACAACCTCAACTCATATTT\n+>67_1\n+TTAAACAATTTGGAATTAATT\n+>68_1\n+TACAAAAAATGCGAAAATTGACCCT\n+>69_1\n+GGACGGAGAACTGATAAGGGCA\n+>70_1\n+TTAAGTTTTAGACATAATCTATTACAA\n+>71_1\n+TTATTATCTATTTTAATTTTTCTTAA\n+>72_1\n+CACCGAACCGGGAAGGCGAACAAC\n+>73_1\n+TCCACCTATTTATCTTTTCTT\n+>74_1\n+TCGCCGTAAAGCCAGTCGTTCTCC\n+>75_1\n+TCAAAGAACAATGTAAAGCCGCGAC\n+>76_1\n+TGGACAAGAACCACGCGACGGGTGT\n+>77_1\n+CCCGAAAAGCCGAGGACGACTTA\n+>78_1\n+TTTTTTATCATTTTTCACCTAAAAAA\n+>79_1\n+TAGAACTCGAACCAGAGCTCC\n+>80_1\n+TCTATATTATTTTTATCAATTTTCACC\n+>81_1\n+TCTATTTCTTTATTTTTTTTATTAT\n+>82_1\n+TTTGATACCTTTATACCATACCTATT\n+>83_1\n+ATAAAGCTAGATTACCAAAGCAT\n+>84_1\n+GCCAACGACCATACCACGA\n+>85_1\n+CGGCACATGTTGAATTACACTCA\n+>86_1\n+TACTATTTTATTATACATACATACATTA\n+>87_1\n+TTAATGACACACGGGAAAAACACCG\n+>88_1\n+TAGTTTCACTACTTTATTCTTTTTA\n+>89_1\n+AACAGGGAGATCAACAGCGTTGACA\n+>90_1\n+CGATATTTTCTCCTCTGACC\n+>91_1\n+ACCAGCACCTTCCGACTCAACGTCAAA\n+>92_1\n+AAGGAATTAAAGCAATAATTCTAA\n+>93_1\n+TACAAAACAAACAAATTACAATCTAAA\n+>94_1\n+CAATTTTTAATTCCTTTTTTCTTCTT\n+>95_1\n+TACAGACAACACATACGGACTTAA\n+>96_1\n+TCTGTATTTGACTTATTACTTTCTCC\n+>97_1\n+TGAGCTAGAACTGCACCCACTCCA\n+>98_1\n+CGCCGCAAGATGAATACTCTAATGA\n+>99_1\n+TATTTCTTTTTTAACTTCTTTTC\n+>100_1\n+TTACAATCTACTATTCTTTTATTA\n+>101_1\n+TTTAAACACTTCCTACATCAAATTTC\n+>102_1\n+TGTGTAATCTTTCTACTTCTTCTAC\n+>103_1\n+TCTATTCATACAAAACACTAATACCC\n+>104_1\n+TGGAGTAGCACAGTCGTCTGAAATC\n+>105_1\n+AAGCACGCCTTACCACAATTTATAA\n+>106_1\n+CTGGAAACTATTGATCAAATT\n+>107_1\n+TACACAGACTTACAAAACACATCCTTC\n+>108_1\n+TTCAAGTAGATTGCATTTTTTAATA\n+>109_1\n+TTATTACATCGTCCACATATAACAAAA\n+>110_1\n+CAAGGCTCAGAAGAACATCACCAAGACC\n+>111_1\n+TGAGGAAAACAGAAAAATGAGAGACA\n+>112_1\n+TCAAAAAGTAATAGGGATCGTTA\n+>113_1\n+TAACTTTAACTTTTTTACT\n+>114_1\n+TATTCCGACAATACCTTCTTTAC\n+>115_1\n+TTTGTTTTTTACTATATTT\n+>116_1\n+TTCATTTTATTTTTAAATATCTTTTTT\n+>117_1\n+TACTCAATAGAACTCTACTCACTCATA\n+>118_1\n+TGAAAGGAAAAACAGGACACGGGA\n+>119_1\n+AAAATCGACTGCCGAAAACATTTTAA\n+>120_1\n+TAC'..b'>132_1\n+CAAATAACAAACTGAATAAACGAAA\n+>133_1\n+TGAGAATGACTTCTTCACGATCTCTT\n+>134_1\n+TCTTATTATCATTTTTTTATCCCTT\n+>135_1\n+TCAAATGCAAATTGGATTTATGA\n+>136_1\n+CCTTACTCAACATACTTAATCATACTTA\n+>137_1\n+TAGACTTTCTACTCATTATTAC\n+>138_1\n+TGAAACTGAAACTAACATACAAAATATT\n+>139_1\n+AAAACCCGGACAAACCATCGGAGGA\n+>140_1\n+TACAGACAACACATACGGACTTAAGT\n+>141_1\n+TATTTCAGCAACAGACTAAGACTAA\n+>142_1\n+AACTTTAAATTTTTAATAACCTT\n+>143_1\n+TATTTATAAATTTTTTCTTGAGAC\n+>144_1\n+TTAATATGTAATTTCATACCTCAC\n+>145_1\n+CACAGACTGAGGCAGAAAAAACAA\n+>146_1\n+TAAAGAAGAAGAATTGATTTTAAT\n+>147_1\n+TACTGAAAACGGGCGCATATCAGTGG\n+>148_1\n+TCAGTCTTTTTTTCTCTCCTA\n+>149_1\n+TATAATTTTATTTTATATTTTCTCT\n+>150_1\n+NATTCTTACTCCATTTCAATTTACT\n+>151_1\n+TTGTAAAACATTCTTTCTCCTGAC\n+>152_1\n+TAATTACCATTGCTAACTATCCA\n+>153_1\n+TTCTTCCTTTTATCCTCTCTTAA\n+>154_1\n+TCTAAACACCCACGAAAATCTCTTAC\n+>155_1\n+AAAAACACACAGACACAAGCAGCAAT\n+>156_1\n+CGGACGGTATATTTTTTAATATAA\n+>157_1\n+TATGGAGAAACAGCGATATAAGTCA\n+>158_1\n+TACAACTAACATCCTTTCTTCTTCC\n+>159_1\n+AACTCTCTAATTTAACTTTGTGC\n+>160_1\n+TCCTGAGGACGAGGGGCGTTTAGC\n+>161_1\n+TATTTCCAACCTTCAACCTCAAATAA\n+>162_1\n+TGGACGGAGAACTGATAAGGGC\n+>163_1\n+TTTAAGACTTATGAGCTTG\n+>164_1\n+TTAAAGACGCAACAACTAACATT\n+>165_1\n+TAGGAACTTCATACCGGTCTC\n+>166_1\n+CGATATTTTCTCCTCTTACC\n+>167_1\n+GAGGATTAAAAGAACGGTTTATAA\n+>168_1\n+GAATGATCGCACCACCACCTCAACGTT\n+>169_1\n+TTTTCTTTACCCATCTTTACTTTCCC\n+>170_1\n+AAGACAACAATGACATATAAGACG\n+>171_1\n+TAATAATTTAAATAAATATAAATTT\n+>172_1\n+TACTGAAACAAGGAAACACAAGC\n+>173_1\n+TCAGAAGAACAGAGAATTGATTTT\n+>174_1\n+CATACCTTAAATTATCTCTTTCTT\n+>175_1\n+TTCTTTTACTACATATTTTTTATTTTT\n+>176_1\n+AAAAAATATCTTTTTTAACTCGTGGCC\n+>177_1\n+TAACAAATAGAACGTTCTAATTTAAA\n+>178_1\n+TAGTTACCTTCATATCTCTCTTTA\n+>179_1\n+TAAAATTGTAATATTTAAATAATAT\n+>180_1\n+AAAAGGAAAAACAGAAAAATTGGG\n+>181_1\n+AGATGTTGATCTAAACTCTCCCA\n+>182_1\n+TACCTCTTTATTAACCTCCACCTCTA\n+>183_1\n+TTTCCGACAAATACACCATCTTC\n+>184_1\n+ACAAATCATAAATTTTTTTTTACT\n+>185_1\n+GACGAAACGCAACAACAAAATGGACG\n+>186_1\n+TACAAATTTTTTTTTCTTTCTTAT\n+>187_1\n+TACACCTCTTTTTACTTTTTTATT\n+>188_1\n+TATGGATTATTTCAAAATTTTTTTTT\n+>189_1\n+TTCTAGCACAACACGCACACATATA\n+>190_1\n+TAACTACTTTTACATTAATACTAA\n+>191_1\n+TCTCATCTTACAATTTTTTAAAACTT\n+>192_1\n+TTCTTGGACTACACATTTTTTATTGTTTTA\n+>193_1\n+TACACACTCATCAACCAAAGTACGTA\n+>194_1\n+TACTATATACTTCTTCAAATCACA\n+>195_1\n+TCAGAGTTCTACAGGTCCTACGATT\n+>196_1\n+TGATTTACTTACATTCTTTTTTT\n+>197_1\n+CCATATATGACTGACTCATTTCAC\n+>198_1\n+GAAGAGGAGGAGGAGTTTGTAAG\n+>199_1\n+AAAGACAAAAGAAATACAGGCACT\n+>200_1\n+TACAAGACTAAAACAAACGTGAAGT\n+>201_1\n+TAACGGAGCACGAGAACGAAGTGG\n+>202_1\n+CTTCTTTTACTACATATTTTTTATTTTTTTA\n+>203_1\n+TAATAAGAAACTGTTCAAACAATCCAC\n+>204_1\n+TGAGCGGAGAACCAGAGTTGATGAGC\n+>205_1\n+TATTATTTTTTTATTCCATTCATAT\n+>206_1\n+TTTATTACTTAGTCATAATTCCAA\n+>207_1\n+TTTTATATTTCCTTATATCTTTACTA\n+>208_1\n+AACGGGGAATAAGGGTTCG\n+>209_1\n+AATCTACAATTTCCATTACGACTCC\n+>210_1\n+CCGACCGAGCAAATAAACACAGGAACG\n+>211_1\n+TCCACAACAACTCTATCTAAAGCATT\n+>212_1\n+TTCTTGATAACGCATCTTCTACAT\n+>213_1\n+TGCTTGGACTACATATGGTTGAGGG\n+>214_1\n+CAGATTCACTGATTTTCTTACGCC\n+>215_1\n+TTTGTTTTTCATTTTTTTATCTTT\n+>216_1\n+CTATATTTTCTCTCTTACC\n+>217_1\n+TAACCTTGCAGAACTATACGATTCAAA\n+>218_1\n+TAAGAAACTGAGCTAACGCAATGTACC\n+>219_1\n+TTCTTTTACTACATATTTTTTATTTTTTTA\n+>220_1\n+TATCTATCTTTGATCTTCTTTTCA\n+>221_1\n+TAATAAATTATTAAATAAAAAAAAAA\n+>222_1\n+TTTTTTATCAATTTTCACCATTCAT\n+>223_1\n+TATTTCACTTTATACTTCCTTAA\n+>224_1\n+TAGTTTTAAATATTTCTTTTTTTC\n+>225_1\n+TTCTTTTACTACATATTGTTTATTTTTTTA\n+>226_1\n+GAGAATAAATATTTCAATGGTCTATTG\n+>227_1\n+CGATATTTTCTCCTCTTACCT\n+>228_1\n+CACGACTTTATTCTTTTTATCTCA\n+>229_1\n+TAGTGGACTTTAAAAAAAAAAAAAAAAAA\n+>230_1\n+CATAATATAAACTTATCTT\n+>231_1\n+ATGAAATTCGAACAATACGTC\n+>232_1\n+AACAACTGCAAACATCTACCACA\n+>233_1\n+TAAAAATAATTGTCTTTAATTTCA\n+>234_1\n+CGCAACCAGCAGCAACTCCTAGCAT\n+>235_1\n+ATTATTAATAAATTATTATAA\n+>236_1\n+CATTAATTCATCCATTTAAACTAA\n+>237_1\n+TCTTATTTTAATCTTCCAATTTC\n+>238_1\n+CTAGACAAGATGCTATAAATTTTAAA\n+>239_1\n+TGACCAAAGACAAACAAACAATAAATA\n+>240_1\n+TTTTTATCAATTTTCACCATTC\n+>241_1\n+TAAGTTTTTAATCATTTTTTTT\n+>242_1\n+TAATCAAAAAACTCTTCATTTTTA\n+>243_1\n+TACAAACGGAACTTTCGTCATAA\n+>244_1\n+TTTTCTTTTTTTCATTTTCTCTTTTA\n+>245_1\n+TAGCCTTTACTAGGCTTTTTCTAA\n+>246_1\n+TTAGTATTAATCTTCACTTAA\n+>247_1\n+TAAAATAAACCAAAACCCAAAAAT\n'
b
diff -r 000000000000 -r 951cb6b3979b test-data/output.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.tab Sun Jun 21 14:28:49 2015 -0400
b
@@ -0,0 +1,247 @@
+ACAGCAGGACGGTGATCA 2
+TGCTTGGACTACATATGGTTGAGTG 2
+TGGAATGTAAAGAAGTATGGAG 2
+AATTTATTTAATTTATATTCTAACTAA 1
+TTATAATCACGGCACCCTATACA 1
+TCCGAAAAATCGTAGGACCCGGGCA 1
+TCCCAACCCTCGAGCATCATTTTC 1
+TTGATTCTTCTTTTTCACAAAA 1
+AGAACAATTAAATAAAATAGCATA 1
+CCAGAAAACAATACAACATCCTCA 1
+TCTAGTCTGAGCGTAGTACCAGATTG 1
+TTTTTAACTCCCATCATTTTTCCTC 1
+GCAGATAGAAATCAATACAAAAATC 1
+AAGTGAAGAAGTAGTTTTT 1
+AATGTCACTTGAAGAATTCACGT 1
+TTTACCAGAGGAGTCGAGTTTTT 1
+GGTATCTTTATATTTTAATTTTCTT 1
+TTACTAGATCCACCCTCATTA 1
+TATATAAATCTTCAACATCAA 1
+GAAACCATTATCTTATCTTTATACA 1
+GGAGATTGTAGAACGAAAGGAAAAT 1
+ACTAAACTTTTCTTACCATATTTCTA 1
+TCAAGCCTTTTGAAGAACTGACCTAAA 1
+TAACATAAATTTTAATCATAAATTG 1
+TAGCGAGATGGACCAACGTGCTGT 1
+TACAAACGTAATTTTCGCATAACATC 1
+CGGAAAAGAATGTAGACCATTTAA 1
+GAAAGGAAGGGAAGAAAGCGAAAGGA 1
+CAAGAATACAAAAAATACTAATTA 1
+CATACCTACAAAAAAGCTTCTCTTAC 1
+TAAAATAAATAAGTCCGACGACAA 1
+AGGAATATGATGAAATAAAAAAAT 1
+TTTTTTTTTTGTTTTTATTTTTATCAT 1
+TTTTTAACTCATTTTACAATTAAAC 1
+CCGCGATCTGCTTATTTATAATCTT 1
+TAGGTACTTACCTTTTTTTTACACAA 1
+TCATTACACTTCTTACAAAAC 1
+TATTATACATAGAATAACAAATCTTT 1
+ATGTTATTTACTTTTTCCCCTTATA 1
+TCCGAAAACAAGGCCCGTCGCT 1
+TGCTTTTACTACATATTTTTTATTTTTTTA 1
+GAACAATTTTTCAATTTTTTACATTA 1
+AACATTTTATCAATTATACATTA 1
+TATCTGATCAACAATCTTTTCCCAT 1
+AATTGCAACAGAGACTGGAA 1
+TATTCAATCACTCCATTATATATAACA 1
+CAAACGGAACAAGACATCACCATC 1
+TCTTCTATATAATCCTTTATTATAA 1
+TGATGACGGGCAGCAGGGATTTTC 1
+TTTATTACAACCCTATCTTACCTCAA 1
+AACAGGAAAAACAGAAGGATTTCTA 1
+CGATATTTTCTCCTCGTACC 1
+AAATTGCAAAGATGGAAAATAAAACT 1
+TACACACTCATCAACCAAAGGACG 1
+TATATTGCCTCCCCATAATCCTT 1
+TACAAACGGAACTTTCTTCATAACTTC 1
+GGACGGAGAACTGATAATGGC 1
+TTCTTTGACTACATATTTTTTATT 1
+TCTTTTTTTTAATACTTATTTTCATT 1
+TACTTTTTTCTTAATTTTTTATTAAAC 1
+AATATAAAAATACAATCAACCATTGCA 1
+CAGTTTCACAAAAGATCTTTTAA 1
+GAAACAAACAACACATACCCTCTGGC 1
+AATGACACACTCTTCATCAAC 1
+AAGATGGAGTAGTTTTTT 1
+ACAACCTCAACTCATATTT 1
+TTAAACAATTTGGAATTAATT 1
+TACAAAAAATGCGAAAATTGACCCT 1
+GGACGGAGAACTGATAAGGGCA 1
+TTAAGTTTTAGACATAATCTATTACAA 1
+TTATTATCTATTTTAATTTTTCTTAA 1
+CACCGAACCGGGAAGGCGAACAAC 1
+TCCACCTATTTATCTTTTCTT 1
+TCGCCGTAAAGCCAGTCGTTCTCC 1
+TCAAAGAACAATGTAAAGCCGCGAC 1
+TGGACAAGAACCACGCGACGGGTGT 1
+CCCGAAAAGCCGAGGACGACTTA 1
+TTTTTTATCATTTTTCACCTAAAAAA 1
+TAGAACTCGAACCAGAGCTCC 1
+TCTATATTATTTTTATCAATTTTCACC 1
+TCTATTTCTTTATTTTTTTTATTAT 1
+TTTGATACCTTTATACCATACCTATT 1
+ATAAAGCTAGATTACCAAAGCAT 1
+GCCAACGACCATACCACGA 1
+CGGCACATGTTGAATTACACTCA 1
+TACTATTTTATTATACATACATACATTA 1
+TTAATGACACACGGGAAAAACACCG 1
+TAGTTTCACTACTTTATTCTTTTTA 1
+AACAGGGAGATCAACAGCGTTGACA 1
+CGATATTTTCTCCTCTGACC 1
+ACCAGCACCTTCCGACTCAACGTCAAA 1
+AAGGAATTAAAGCAATAATTCTAA 1
+TACAAAACAAACAAATTACAATCTAAA 1
+CAATTTTTAATTCCTTTTTTCTTCTT 1
+TACAGACAACACATACGGACTTAA 1
+TCTGTATTTGACTTATTACTTTCTCC 1
+TGAGCTAGAACTGCACCCACTCCA 1
+CGCCGCAAGATGAATACTCTAATGA 1
+TATTTCTTTTTTAACTTCTTTTC 1
+TTACAATCTACTATTCTTTTATTA 1
+TTTAAACACTTCCTACATCAAATTTC 1
+TGTGTAATCTTTCTACTTCTTCTAC 1
+TCTATTCATACAAAACACTAATACCC 1
+TGGAGTAGCACAGTCGTCTGAAATC 1
+AAGCACGCCTTACCACAATTTATAA 1
+CTGGAAACTATTGATCAAATT 1
+TACACAGACTTACAAAACACATCCTTC 1
+TTCAAGTAGATTGCATTTTTTAATA 1
+TTATTACATCGTCCACATATAACAAAA 1
+CAAGGCTCAGAAGAACATCACCAAGACC 1
+TGAGGAAAACAGAAAAATGAGAGACA 1
+TCAAAAAGTAATAGGGATCGTTA 1
+TAACTTTAACTTTTTTACT 1
+TATTCCGACAATACCTTCTTTAC 1
+TTTGTTTTTTACTATATTT 1
+TTCATTTTATTTTTAAATATCTTTTTT 1
+TACTCAATAGAACTCTACTCACTCATA 1
+TGAAAGGAAAAACAGGACACGGGA 1
+AAAATCGACTGCCGAAAACATTTTAA 1
+TACAGAGAAATATACAACACTCACC 1
+TCAACTGGCAAGAATTTTTGAAAATT 1
+GAGAACTTTTAATCATTTTAC 1
+TATTATCATCTCGTTCTTCCTTCTC 1
+TTTTCATTTCTTCTTCAAATCCTTT 1
+TAGTCATACATACCTAATTATACATA 1
+ATTTTACTTCATCATTTTC 1
+TCTCTTTTATTTTTATCTTTCCTT 1
+GCCGGGGCGTGAGATGTCTGCATTA 1
+AGGATTTTTAAGCCCATATGTTTCC 1
+CAAGATATGAACAAAGCAAAGACAC 1
+CAACACATGACGCGACAATTCTTG 1
+CAAATAACAAACTGAATAAACGAAA 1
+TGAGAATGACTTCTTCACGATCTCTT 1
+TCTTATTATCATTTTTTTATCCCTT 1
+TCAAATGCAAATTGGATTTATGA 1
+CCTTACTCAACATACTTAATCATACTTA 1
+TAGACTTTCTACTCATTATTAC 1
+TGAAACTGAAACTAACATACAAAATATT 1
+AAAACCCGGACAAACCATCGGAGGA 1
+TACAGACAACACATACGGACTTAAGT 1
+TATTTCAGCAACAGACTAAGACTAA 1
+AACTTTAAATTTTTAATAACCTT 1
+TATTTATAAATTTTTTCTTGAGAC 1
+TTAATATGTAATTTCATACCTCAC 1
+CACAGACTGAGGCAGAAAAAACAA 1
+TAAAGAAGAAGAATTGATTTTAAT 1
+TACTGAAAACGGGCGCATATCAGTGG 1
+TCAGTCTTTTTTTCTCTCCTA 1
+TATAATTTTATTTTATATTTTCTCT 1
+NATTCTTACTCCATTTCAATTTACT 1
+TTGTAAAACATTCTTTCTCCTGAC 1
+TAATTACCATTGCTAACTATCCA 1
+TTCTTCCTTTTATCCTCTCTTAA 1
+TCTAAACACCCACGAAAATCTCTTAC 1
+AAAAACACACAGACACAAGCAGCAAT 1
+CGGACGGTATATTTTTTAATATAA 1
+TATGGAGAAACAGCGATATAAGTCA 1
+TACAACTAACATCCTTTCTTCTTCC 1
+AACTCTCTAATTTAACTTTGTGC 1
+TCCTGAGGACGAGGGGCGTTTAGC 1
+TATTTCCAACCTTCAACCTCAAATAA 1
+TGGACGGAGAACTGATAAGGGC 1
+TTTAAGACTTATGAGCTTG 1
+TTAAAGACGCAACAACTAACATT 1
+TAGGAACTTCATACCGGTCTC 1
+CGATATTTTCTCCTCTTACC 1
+GAGGATTAAAAGAACGGTTTATAA 1
+GAATGATCGCACCACCACCTCAACGTT 1
+TTTTCTTTACCCATCTTTACTTTCCC 1
+AAGACAACAATGACATATAAGACG 1
+TAATAATTTAAATAAATATAAATTT 1
+TACTGAAACAAGGAAACACAAGC 1
+TCAGAAGAACAGAGAATTGATTTT 1
+CATACCTTAAATTATCTCTTTCTT 1
+TTCTTTTACTACATATTTTTTATTTTT 1
+AAAAAATATCTTTTTTAACTCGTGGCC 1
+TAACAAATAGAACGTTCTAATTTAAA 1
+TAGTTACCTTCATATCTCTCTTTA 1
+TAAAATTGTAATATTTAAATAATAT 1
+AAAAGGAAAAACAGAAAAATTGGG 1
+AGATGTTGATCTAAACTCTCCCA 1
+TACCTCTTTATTAACCTCCACCTCTA 1
+TTTCCGACAAATACACCATCTTC 1
+ACAAATCATAAATTTTTTTTTACT 1
+GACGAAACGCAACAACAAAATGGACG 1
+TACAAATTTTTTTTTCTTTCTTAT 1
+TACACCTCTTTTTACTTTTTTATT 1
+TATGGATTATTTCAAAATTTTTTTTT 1
+TTCTAGCACAACACGCACACATATA 1
+TAACTACTTTTACATTAATACTAA 1
+TCTCATCTTACAATTTTTTAAAACTT 1
+TTCTTGGACTACACATTTTTTATTGTTTTA 1
+TACACACTCATCAACCAAAGTACGTA 1
+TACTATATACTTCTTCAAATCACA 1
+TCAGAGTTCTACAGGTCCTACGATT 1
+TGATTTACTTACATTCTTTTTTT 1
+CCATATATGACTGACTCATTTCAC 1
+GAAGAGGAGGAGGAGTTTGTAAG 1
+AAAGACAAAAGAAATACAGGCACT 1
+TACAAGACTAAAACAAACGTGAAGT 1
+TAACGGAGCACGAGAACGAAGTGG 1
+CTTCTTTTACTACATATTTTTTATTTTTTTA 1
+TAATAAGAAACTGTTCAAACAATCCAC 1
+TGAGCGGAGAACCAGAGTTGATGAGC 1
+TATTATTTTTTTATTCCATTCATAT 1
+TTTATTACTTAGTCATAATTCCAA 1
+TTTTATATTTCCTTATATCTTTACTA 1
+AACGGGGAATAAGGGTTCG 1
+AATCTACAATTTCCATTACGACTCC 1
+CCGACCGAGCAAATAAACACAGGAACG 1
+TCCACAACAACTCTATCTAAAGCATT 1
+TTCTTGATAACGCATCTTCTACAT 1
+TGCTTGGACTACATATGGTTGAGGG 1
+CAGATTCACTGATTTTCTTACGCC 1
+TTTGTTTTTCATTTTTTTATCTTT 1
+CTATATTTTCTCTCTTACC 1
+TAACCTTGCAGAACTATACGATTCAAA 1
+TAAGAAACTGAGCTAACGCAATGTACC 1
+TTCTTTTACTACATATTTTTTATTTTTTTA 1
+TATCTATCTTTGATCTTCTTTTCA 1
+TAATAAATTATTAAATAAAAAAAAAA 1
+TTTTTTATCAATTTTCACCATTCAT 1
+TATTTCACTTTATACTTCCTTAA 1
+TAGTTTTAAATATTTCTTTTTTTC 1
+TTCTTTTACTACATATTGTTTATTTTTTTA 1
+GAGAATAAATATTTCAATGGTCTATTG 1
+CGATATTTTCTCCTCTTACCT 1
+CACGACTTTATTCTTTTTATCTCA 1
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA 1
+CATAATATAAACTTATCTT 1
+ATGAAATTCGAACAATACGTC 1
+AACAACTGCAAACATCTACCACA 1
+TAAAAATAATTGTCTTTAATTTCA 1
+CGCAACCAGCAGCAACTCCTAGCAT 1
+ATTATTAATAAATTATTATAA 1
+CATTAATTCATCCATTTAAACTAA 1
+TCTTATTTTAATCTTCCAATTTC 1
+CTAGACAAGATGCTATAAATTTTAAA 1
+TGACCAAAGACAAACAAACAATAAATA 1
+TTTTTATCAATTTTCACCATTC 1
+TAAGTTTTTAATCATTTTTTTT 1
+TAATCAAAAAACTCTTCATTTTTA 1
+TACAAACGGAACTTTCGTCATAA 1
+TTTTCTTTTTTTCATTTTCTCTTTTA 1
+TAGCCTTTACTAGGCTTTTTCTAA 1
+TTAGTATTAATCTTCACTTAA 1
+TAAAATAAACCAAAACCCAAAAAT 1