Repository 'sequence_format_converter'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/sequence_format_converter

Changeset 0:a8aacccd79a3 (2017-09-04)
Next changeset 1:9ce7ccd468aa (2018-02-16)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
added:
sequence_format_converter.py
sequence_format_converter.xml
test-data/fastqTofasta.sorted.fa
test-data/fastqTofastaw.sorted.faw
test-data/fastqTotabular.sorted.tab
test-data/input.fa
test-data/input.fastqsanger
test-data/input.sorted.fa
test-data/output.faw
test-data/output.sorted.faw
test-data/output.sorted.tab
test-data/output.tab
test-data/sort.py
b
diff -r 000000000000 -r a8aacccd79a3 sequence_format_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sequence_format_converter.py Mon Sep 04 07:13:28 2017 -0400
[
b'@@ -0,0 +1,208 @@\n+#!/usr/bin/env python\n+#\n+import argparse\n+import logging\n+import sys\n+from collections import defaultdict\n+\n+\n+def Parser():\n+    the_parser = argparse.ArgumentParser()\n+    the_parser.add_argument(\n+        \'--input\', action="store", type=str,\n+        help="input file, accepted format: fastq, fasta, fasta_weigthed, \\\n+            tabular")\n+    the_parser.add_argument(\n+        \'--output\', action="store", type=str, help="output converted file")\n+    the_parser.add_argument(\n+        \'--format\', action="store", type=str,\n+        help="select output format (fasta, fasta_weigthed, tabular")\n+    args = the_parser.parse_args()\n+    return args\n+\n+\n+class Sequencing:\n+\n+    def __init__(self, input, output, format):\n+        self.input = input\n+        self.output = open(output, \'w\')\n+        self.outputformat = format\n+        self.inputformat = self.detectformat(self.input)\n+        self.seqdic = defaultdict(int)\n+        self.read(self.input, self.inputformat)\n+        self.write(self.output, self.outputformat)\n+\n+    def detectformat(self, input):\n+        input = open(input, \'r\')\n+        block = []\n+        reference = [\'A\', \'T\', \'G\', \'C\', \'N\']\n+        format = \'\'\n+        try:\n+            for l in range(4):\n+                block.append(input.readline()[:-1])\n+        except:\n+            logging.info("File hasn\'t at leat four lines !")\n+            sys.exit("File hasn\'t at leat four lines !")\n+        input.close()\n+        line1, line2, line3, line4 = block[0], block[1], block[2], block[3]\n+        if line1[0] == \'>\' and line3[0] == \'>\':\n+            logging.info("\'>\' detected in lines 1 and 3")\n+            sequence = \'\'.join([line2, line4]).upper()\n+            nucleotides = set([base for base in sequence])\n+            for nucleotide in nucleotides:\n+                if nucleotide not in reference:\n+                    logging.info("But other nucleotides that A, T, G, C or N")\n+                    sys.exit(\'input appears to be Fasta but with \\\n+                              unexpected nucleotides\')\n+            format = \'fasta\'\n+        elif line1[0] == \'>\' and line4[0] == \'>\':\n+            logging.info("\'>\' detected in lines 1 and 4")\n+            sequence = \'\'.join([line2, line3]).upper()\n+            nucleotides = set([base for base in sequence])\n+            for nucleotide in nucleotides:\n+                if nucleotide not in reference:\n+                    logging.info("But other nucleotides that A, T, G, C or N")\n+                    sys.exit(\'input appears to be Fasta but with \\\n+                              unexpected nucleotides\')\n+            format = \'fasta\'\n+        elif line1[0] == \'>\':\n+            logging.info("\'>\' detected in lines 1")\n+            sequence = \'\'.join([line2, line3, line4]).upper()\n+            nucleotides = set([base for base in sequence])\n+            for nucleotide in nucleotides:\n+                if nucleotide not in reference:\n+                    logging.info("But other nucleotides that A, T, G, C or N")\n+                    sys.exit(\'input appears to be Fasta but with \\\n+                              unexpected nucleotides\')\n+            format = \'fasta\'\n+        if format == \'fasta\':\n+            try:\n+                for line in block:\n+                    if line[0] == \'>\':\n+                        int(line.split(\'_\')[-1])\n+                return \'fastaw\'\n+            except:\n+                return \'fasta\'\n+        if line1[0] == \'@\' and line3[0] == \'+\':\n+            nucleotides = set([base for base in line2])\n+            for nucleotide in nucleotides:\n+                if nucleotide not in reference:\n+                    logging.info("Looks like fastq input but other nucleotides \\\n+                                 that A, T, G, C or N")\n+                    sys.exit("input appears to be Fastq \\\n+                             but with unexpected nucleotides")\n+            return \'fastq\'\n+        for line in block:\n+            if len(line.split(\'\\t\')) !'..b'"No valid format detected")\n+                sys.exit(\'No valid format detected\')\n+            for nucleotide in line.split(\'\\t\')[0]:\n+                if nucleotide not in reference:\n+                    logging.info("No valid format detected")\n+                    sys.exit(\'No valid format detected\')\n+        return \'tabular\'\n+\n+    def read(self, input, format):\n+        input = open(input, \'r\')\n+        if format == \'fasta\':\n+            try:\n+                self.readfasta(input)\n+            except:\n+                logging.info("an error occured while reading fasta")\n+        elif format == \'fastaw\':\n+            try:\n+                self.readfastaw(input)\n+            except:\n+                logging.info("an error occured while reading fastaw")\n+        elif format == \'tabular\':\n+            try:\n+                self.readtabular(input)\n+            except:\n+                logging.info("an error occured while reading tabular")\n+        elif format == \'fastq\':\n+            try:\n+                self.readfastq(input)\n+            except:\n+                logging.info("an error occured while reading fastq")\n+        else:\n+            logging.info("no valid format detected")\n+            sys.exit(\'No valid format detected\')\n+\n+    def readfastaw(self, input):\n+        for line in input:\n+            if line[0] == ">":\n+                weigth = int(line[:-1].split("_")[-1])\n+            else:\n+                self.seqdic[line[:-1]] += weigth\n+        input.close()\n+\n+    def readfasta(self, input):\n+        \'\'\' this method is able to read multi-line fasta sequence\'\'\'\n+        for line in input:\n+            if line[0] == ">":\n+                try:\n+                    #  to dump the sequence of the previous item\n+                    #  try because of first missing stringlist variable\n+                    self.seqdic["".join(stringlist)] += 1\n+                except NameError:\n+                    pass\n+                stringlist = []\n+            else:\n+                try:\n+                    stringlist.append(line[:-1])\n+                except UnboundLocalError:\n+                    # if file went through filter and contains only empty lines\n+                    logging.info("first line is empty.")\n+        try:\n+            self.seqdic["".join(stringlist)] += 1  # for the last sequence\n+        except NameError:\n+            logging.info("input file has not fasta sequences.")\n+        input.close()\n+\n+    def readtabular(self, input):\n+        for line in input:\n+            fields = line[:-1].split(\'\\t\')\n+            self.seqdic[fields[0]] += int(fields[1])\n+        input.close()\n+\n+    def readfastq(self, input):\n+        linecount = 0\n+        for line in input:\n+            linecount += 1\n+            if linecount % 4 == 2:\n+                self.seqdic[line[:-1]] += 1\n+        input.close()\n+\n+    def write(self, output, format=\'fasta\'):\n+        if format == \'fasta\':\n+            headercount = 0\n+            for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True):\n+                for i in range(self.seqdic[seq]):\n+                    headercount += 1\n+                    output.write(\'>%s\\n%s\\n\' % (headercount, seq))\n+        elif format == \'fastaw\':\n+            headercount = 0\n+            for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True):\n+                headercount += 1\n+                output.write(\'>%s_%s\\n%s\\n\' % (headercount,\n+                                               self.seqdic[seq], seq))\n+        elif format == \'tabular\':\n+            for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True):\n+                output.write(\'%s\\t%s\\n\' % (seq, self.seqdic[seq]))\n+        output.close()\n+\n+\n+def main(input, output, format):\n+    Sequencing(input, output, format)\n+\n+\n+if __name__ == "__main__":\n+    args = Parser()\n+    log = logging.getLogger(__name__)\n+    logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n+    main(args.input, args.output, args.format)\n'
b
diff -r 000000000000 -r a8aacccd79a3 sequence_format_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sequence_format_converter.xml Mon Sep 04 07:13:28 2017 -0400
[
@@ -0,0 +1,169 @@
+<tool id="sequence_format_converter" name="sequence_format_converter" version="2.0.0">
+  <description></description>
+  <command><![CDATA[
+      python '$__tool_directory__'/sequence_format_converter.py
+      --input '$input'
+      --output '$output'
+      --format '$output_format'
+  ]]></command>
+
+<inputs>
+    <param name="input" type="data" format="fasta, fastq, tabular" label="file to convert to tabular (input format is automatically detected)"/>
+    <param name="output_format" type="select" label="conversion options">
+        <option value="tabular" selected="true">tabular</option>
+        <option value="fasta">Fasta</option>
+        <option value="fastaw">Weighted fasta</option>
+    </param>
+   </inputs>
+
+ <outputs>
+   <data format="fasta" name="output" label="${output_format} conversion of ${input.name}">
+     <change_format>
+        <when input="output_format" value="tabular" format="tabular"/>
+     </change_format>
+   </data>
+</outputs>
+
+    <tests>
+        <test>
+            <!-- convertion fasta to tabular -->
+            <param name="output_format" value="tabular" />
+            <param ftype="fasta" name="input" value="input.fa" />
+            <output file="output.tab" name="output" />
+        </test>
+        <test>
+            <!-- convertion tabular to fasta -->
+            <param name="output_format" value="fasta" />
+            <param ftype="tabular" name="input" value="output.tab" />
+            <output file="input.sorted.fa" name="output" sort="True"/>
+        </test>
+        <test>
+            <!-- convertion fastaw to tabular -->
+            <param name="output_format" value="tabular" />
+            <param ftype="fasta" name="input" value="output.faw" />
+            <output file="output.sorted.tab" name="output" sort="True"/>
+        </test>
+        <test>
+            <!-- convertion tabular to fastaw -->
+            <param name="output_format" value="fastaw" />
+            <param ftype="tabular" name="input" value="output.tab" />
+            <output file="output.sorted.faw" name="output" sort="True" />
+        </test>
+         <test>
+            <!-- convertion fasta to fastaw -->
+            <param name="output_format" value="fastaw" />
+            <param ftype="fasta" name="input" value="input.fa" />
+            <output file="output.sorted.faw" name="output" sort="True" />
+        </test>
+        <test>
+            <!-- convertion fastaw to fasta -->
+            <param name="output_format" value="fasta" />
+            <param ftype="fasta" name="input" value="output.faw" />
+            <output file="input.sorted.fa" name="output" sort="True" />
+        </test>
+        <test>
+            <!-- convertion fastq to tabular -->
+            <param name="output_format" value="tabular" />
+            <param ftype="fastq" name="input" value="input.fastqsanger" />
+            <output file="fastqTotabular.sorted.tab" name="output" sort="True" />
+        </test>
+         <test>
+            <!-- convertion fastq to fasta -->
+            <param name="output_format" value="fasta" />
+            <param ftype="fasta" name="input" value="input.fastqsanger" />
+            <output file="fastqTofasta.sorted.fa" name="output" sort="True" />
+        </test>
+        <test>
+            <!-- convertion fastq to fastaw -->
+            <param name="output_format" value="fastaw" />
+            <param ftype="fasta" name="input" value="input.fastqsanger" />
+            <output file="fastqTofastaw.sorted.faw" name="output" sort="True" />
+        </test>
+   </tests>
+
+
+<help>
+
+**What it does**
+
+The tool performs all pairwise conversions between sequence formats fasta, fastaw and tabular.
+
+The tool is also able to convert fastq format in any of the formats fasta, fastaw and tabular.
+
+The format of the input is automatically detected by the tool.
+
+**Formats**
+
+*Fasta*
+
+>id1
+
+ATGCATGACCAGATAGGAC
+
+>id2
+
+ATGCATGACCAGATAGGAC
+
+Note that the tool handles fasta sequences over multiple lines
+
+
+----------
+
+*Fastaw*
+
+Allows to reduce the size of a fasta file of sequence reads:
+
+>id1_n1
+
+ATGCATGACCAGATAGGAC
+
+>id2_n2
+
+ATGCATGACCAGATAGGAC
+
+etc...
+
+Here n1 and n2 are integers that indicate the number of reads of the sequence found in the sequencing dataset
+
+Note that if 2 fastaw files are merged (e.g. by concatenation), the values of the number of reads are wrong.
+
+These values can simply be re-computed by submitting the merged file to a fastaw conversion with the *sequence_format_converter* tool !
+
+
+----------
+
+*Tabular*
+
+Is a tabular version of fastaw without fasta headers:
+
+column 1               column2
+
+ATGCATGACCAGATAGGAC    n1
+
+ATGCATGACCAGATAGGAC    n2
+
+
+----------
+
+*Fastq*
+
+@HWI-1
+
+ATGCATGACCAGATAGGAC
+
+\+
+
+BBBA;ACB9ABCBABB@@/
+
+@HWI-2
+
+ATGCATGACCAGATAGGAC
+
+\+
+
+?03@?82?B>C@B>@CC?0
+
+
+</help>
+
+</tool>
b
diff -r 000000000000 -r a8aacccd79a3 test-data/fastqTofasta.sorted.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqTofasta.sorted.fa Mon Sep 04 07:13:28 2017 -0400
b
@@ -0,0 +1,500 @@
+>1
+>10
+>100
+>101
+>102
+>103
+>104
+>105
+>106
+>107
+>108
+>109
+>11
+>110
+>111
+>112
+>113
+>114
+>115
+>116
+>117
+>118
+>119
+>12
+>120
+>121
+>122
+>123
+>124
+>125
+>126
+>127
+>128
+>129
+>13
+>130
+>131
+>132
+>133
+>134
+>135
+>136
+>137
+>138
+>139
+>14
+>140
+>141
+>142
+>143
+>144
+>145
+>146
+>147
+>148
+>149
+>15
+>150
+>151
+>152
+>153
+>154
+>155
+>156
+>157
+>158
+>159
+>16
+>160
+>161
+>162
+>163
+>164
+>165
+>166
+>167
+>168
+>169
+>17
+>170
+>171
+>172
+>173
+>174
+>175
+>176
+>177
+>178
+>179
+>18
+>180
+>181
+>182
+>183
+>184
+>185
+>186
+>187
+>188
+>189
+>19
+>190
+>191
+>192
+>193
+>194
+>195
+>196
+>197
+>198
+>199
+>2
+>20
+>200
+>201
+>202
+>203
+>204
+>205
+>206
+>207
+>208
+>209
+>21
+>210
+>211
+>212
+>213
+>214
+>215
+>216
+>217
+>218
+>219
+>22
+>220
+>221
+>222
+>223
+>224
+>225
+>226
+>227
+>228
+>229
+>23
+>230
+>231
+>232
+>233
+>234
+>235
+>236
+>237
+>238
+>239
+>24
+>240
+>241
+>242
+>243
+>244
+>245
+>246
+>247
+>248
+>249
+>25
+>250
+>26
+>27
+>28
+>29
+>3
+>30
+>31
+>32
+>33
+>34
+>35
+>36
+>37
+>38
+>39
+>4
+>40
+>41
+>42
+>43
+>44
+>45
+>46
+>47
+>48
+>49
+>5
+>50
+>51
+>52
+>53
+>54
+>55
+>56
+>57
+>58
+>59
+>6
+>60
+>61
+>62
+>63
+>64
+>65
+>66
+>67
+>68
+>69
+>7
+>70
+>71
+>72
+>73
+>74
+>75
+>76
+>77
+>78
+>79
+>8
+>80
+>81
+>82
+>83
+>84
+>85
+>86
+>87
+>88
+>89
+>9
+>90
+>91
+>92
+>93
+>94
+>95
+>96
+>97
+>98
+>99
+AATGGCACTGGAAGAATTCACGG
+AATGGCACTGGAAGAATTCACGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGGT
+AATGGCACTGGAAGAATTCACGTG
+AATTGCACTAGTCCCGGCCTG
+ACTGAATTCTCGTGGGTCTGCAT
+AGGACGGGAAGGTGTCAACG
+ATAAAGCTAGATTACCAAAGCAT
+CAAATTCGGTTCTAGAGAGGTT
+CGAATAGCGTTGTGACTGA
+CGGACGGTATATGGGTTAATATT
+CGGATGATGGTTCACAACGACC
+CGGCACATGTTGAAGTACACTCA
+CGGCACATGTTGAAGTACACTCA
+CGGCACATGTTGAAGTACACTCAA
+CGGCACATGTTGAAGTACACTCAA
+CTGACTAGATCCACACTCATTA
+GGACGGAGAACTGATAAGGGCTCGG
+GGCGAACATGGATCTAGTGCACG
+GGGAGCGAGACGGGGACTCAC
+GGGAGCGAGACGGGGACTCACT
+GGGAGCGAGACGGGGACTCACT
+TAAAGCTAGATTACCAAAGCAT
+TAAAGCTAGATTACCAAAGCAT
+TAAAGCTAGATTACCAAAGCAT
+TAAGGAAATAGTAGCCGTGAT
+TAAGGAAATAGTAGCCGTGAT
+TAAGGAAATAGTAGCCGTGAT
+TAGCACCACATGATTCGGCT
+TAGGAACTTCATACCGTGCTCT
+TAGGAACTTCATACCGTGCTCT
+TATCACAGCCAGCTTTGAGGAG
+TATCACAGCCATTTTGACGAGTT
+TATCACAGCCATTTTGACGAGTT
+TATTGCACTTGAGACGGCCTTA
+TCAGGTACCTGAAGTAGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCGTTAT
+TCTTTGGTATTCTAGCTGTAGA
+TGACTAGATCCACACTCATTA
+TGACTAGATCCACACTCATTAA
+TGACTAGATCCACACTCATTAA
+TGACTAGATCCACACTCATTAC
+TGACTAGATTCACACTCATTA
+TGGAATGTAAAGAAGAATGGAG
+TGGAATGTAAAGAAGTATGG
+TGGAATGTAAAGAAGTATGG
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAGA
+TGGAATGTAAAGAAGTATGGAGA
+TGGAATGTAAAGAAGTATGGAGT
+TGGAATGTAAAGAATTATGGAG
+TGGAATGTAAAGGAGTATGGAG
+TGGACGGAGAACTGATAAGG
+TGGACGGAGAACTGATAAGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAAA
+TGGACGGAGAACTGATAAGGGCAT
+TGGACGGAGAACTGATAAGGGCT
+TGGACGGAGAACTGATAAGGGCTT
+TGGACGGAGAACTGATAAGGGT
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAAA
+TGTGATGTGACGTAGTGGAAA
+TGTGATGTGACGTAGTGGAAC
b
diff -r 000000000000 -r a8aacccd79a3 test-data/fastqTofastaw.sorted.faw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqTofastaw.sorted.faw Mon Sep 04 07:13:28 2017 -0400
b
@@ -0,0 +1,110 @@
+>10_3
+>11_3
+>12_2
+>13_2
+>14_2
+>15_2
+>16_2
+>17_2
+>18_2
+>19_2
+>1_86
+>20_2
+>21_2
+>22_2
+>23_1
+>24_1
+>25_1
+>26_1
+>27_1
+>28_1
+>29_1
+>2_43
+>30_1
+>31_1
+>32_1
+>33_1
+>34_1
+>35_1
+>36_1
+>37_1
+>38_1
+>39_1
+>3_16
+>40_1
+>41_1
+>42_1
+>43_1
+>44_1
+>45_1
+>46_1
+>47_1
+>48_1
+>49_1
+>4_10
+>50_1
+>51_1
+>52_1
+>53_1
+>54_1
+>55_1
+>5_9
+>6_8
+>7_7
+>8_6
+>9_4
+AATGGCACTGGAAGAATTCACGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGGT
+AATGGCACTGGAAGAATTCACGTG
+AATTGCACTAGTCCCGGCCTG
+ACTGAATTCTCGTGGGTCTGCAT
+AGGACGGGAAGGTGTCAACG
+ATAAAGCTAGATTACCAAAGCAT
+CAAATTCGGTTCTAGAGAGGTT
+CGAATAGCGTTGTGACTGA
+CGGACGGTATATGGGTTAATATT
+CGGATGATGGTTCACAACGACC
+CGGCACATGTTGAAGTACACTCA
+CGGCACATGTTGAAGTACACTCAA
+CTGACTAGATCCACACTCATTA
+GGACGGAGAACTGATAAGGGCTCGG
+GGCGAACATGGATCTAGTGCACG
+GGGAGCGAGACGGGGACTCAC
+GGGAGCGAGACGGGGACTCACT
+TAAAGCTAGATTACCAAAGCAT
+TAAGGAAATAGTAGCCGTGAT
+TAGCACCACATGATTCGGCT
+TAGGAACTTCATACCGTGCTCT
+TATCACAGCCAGCTTTGAGGAG
+TATCACAGCCATTTTGACGAGTT
+TATTGCACTTGAGACGGCCTTA
+TCAGGTACCTGAAGTAGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCGTTAT
+TCTTTGGTATTCTAGCTGTAGA
+TGACTAGATCCACACTCATTA
+TGACTAGATCCACACTCATTAA
+TGACTAGATCCACACTCATTAC
+TGACTAGATTCACACTCATTA
+TGGAATGTAAAGAAGAATGGAG
+TGGAATGTAAAGAAGTATGG
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAGA
+TGGAATGTAAAGAAGTATGGAGT
+TGGAATGTAAAGAATTATGGAG
+TGGAATGTAAAGGAGTATGGAG
+TGGACGGAGAACTGATAAGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAAA
+TGGACGGAGAACTGATAAGGGCAT
+TGGACGGAGAACTGATAAGGGCT
+TGGACGGAGAACTGATAAGGGCTT
+TGGACGGAGAACTGATAAGGGT
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAAA
+TGTGATGTGACGTAGTGGAAC
b
diff -r 000000000000 -r a8aacccd79a3 test-data/fastqTotabular.sorted.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqTotabular.sorted.tab Mon Sep 04 07:13:28 2017 -0400
b
@@ -0,0 +1,55 @@
+AATGGCACTGGAAGAATTCACGG 2
+AATGGCACTGGAAGAATTCACGGG 4
+AATGGCACTGGAAGAATTCACGGGT 1
+AATGGCACTGGAAGAATTCACGTG 1
+AATTGCACTAGTCCCGGCCTG 1
+ACTGAATTCTCGTGGGTCTGCAT 1
+AGGACGGGAAGGTGTCAACG 1
+ATAAAGCTAGATTACCAAAGCAT 1
+CAAATTCGGTTCTAGAGAGGTT 1
+CGAATAGCGTTGTGACTGA 1
+CGGACGGTATATGGGTTAATATT 1
+CGGATGATGGTTCACAACGACC 1
+CGGCACATGTTGAAGTACACTCA 2
+CGGCACATGTTGAAGTACACTCAA 2
+CTGACTAGATCCACACTCATTA 1
+GGACGGAGAACTGATAAGGGCTCGG 1
+GGCGAACATGGATCTAGTGCACG 1
+GGGAGCGAGACGGGGACTCAC 1
+GGGAGCGAGACGGGGACTCACT 2
+TAAAGCTAGATTACCAAAGCAT 3
+TAAGGAAATAGTAGCCGTGAT 3
+TAGCACCACATGATTCGGCT 1
+TAGGAACTTCATACCGTGCTCT 2
+TATCACAGCCAGCTTTGAGGAG 1
+TATCACAGCCATTTTGACGAGTT 2
+TATTGCACTTGAGACGGCCTTA 1
+TCAGGTACCTGAAGTAGCG 1
+TCAGGTACCTGAAGTAGCGCGCG 10
+TCAGGTACCTGAAGTAGCGCGCGTTAT 1
+TCTTTGGTATTCTAGCTGTAGA 1
+TGACTAGATCCACACTCATTA 1
+TGACTAGATCCACACTCATTAA 2
+TGACTAGATCCACACTCATTAC 1
+TGACTAGATTCACACTCATTA 1
+TGGAATGTAAAGAAGAATGGAG 1
+TGGAATGTAAAGAAGTATGG 2
+TGGAATGTAAAGAAGTATGGA 7
+TGGAATGTAAAGAAGTATGGAG 43
+TGGAATGTAAAGAAGTATGGAGA 2
+TGGAATGTAAAGAAGTATGGAGT 1
+TGGAATGTAAAGAATTATGGAG 1
+TGGAATGTAAAGGAGTATGGAG 1
+TGGACGGAGAACTGATAAGG 2
+TGGACGGAGAACTGATAAGGG 6
+TGGACGGAGAACTGATAAGGGC 86
+TGGACGGAGAACTGATAAGGGCA 8
+TGGACGGAGAACTGATAAGGGCAA 9
+TGGACGGAGAACTGATAAGGGCAAA 1
+TGGACGGAGAACTGATAAGGGCAT 1
+TGGACGGAGAACTGATAAGGGCT 1
+TGGACGGAGAACTGATAAGGGCTT 1
+TGGACGGAGAACTGATAAGGGT 1
+TGTGATGTGACGTAGTGGAA 16
+TGTGATGTGACGTAGTGGAAA 2
+TGTGATGTGACGTAGTGGAAC 1
b
diff -r 000000000000 -r a8aacccd79a3 test-data/input.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fa Mon Sep 04 07:13:28 2017 -0400
b
@@ -0,0 +1,500 @@
+>1
+TAGTTACCTTCATATCTCTCTTTA
+>2
+TCTATTCATACAAAACACTAATACCC
+>3
+ACAACCTCAACTCATATTT
+>4
+TATAATTTTATTTTATATTTTCTCT
+>5
+TCTTCTATATAATCCTTTATTATAA
+>6
+TAAAATAAACCAAAACCCAAAAAT
+>7
+AATCTACAATTTCCATTACGACTCC
+>8
+TTTCCGACAAATACACCATCTTC
+>9
+CAGATTCACTGATTTTCTTACGCC
+>10
+CAAGAATACAAAAAATACTAATTA
+>11
+AACTCTCTAATTTAACTTTGTGC
+>12
+AAAAACACACAGACACAAGCAGCAAT
+>13
+TTACTAGATCCACCCTCATTA
+>14
+ATGTTATTTACTTTTTCCCCTTATA
+>15
+CGATATTTTCTCCTCTTACC
+>16
+TACAGAGAAATATACAACACTCACC
+>17
+ATGAAATTCGAACAATACGTC
+>18
+GAGAATAAATATTTCAATGGTCTATTG
+>19
+TGCTTTTACTACATATTTTTTATTTTTTTA
+>20
+CATACCTTAAATTATCTCTTTCTT
+>21
+TTTGTTTTTCATTTTTTTATCTTT
+>22
+TTATTATCTATTTTAATTTTTCTTAA
+>23
+TATTATCATCTCGTTCTTCCTTCTC
+>24
+TCAACTGGCAAGAATTTTTGAAAATT
+>25
+TACAAATTTTTTTTTCTTTCTTAT
+>26
+TTTTCTTTTTTTCATTTTCTCTTTTA
+>27
+AAGATGGAGTAGTTTTTT
+>28
+TCTCATCTTACAATTTTTTAAAACTT
+>29
+CATACCTACAAAAAAGCTTCTCTTAC
+>30
+TTTTATATTTCCTTATATCTTTACTA
+>31
+GGAGATTGTAGAACGAAAGGAAAAT
+>32
+TCTATTTCTTTATTTTTTTTATTAT
+>33
+CGGACGGTATATTTTTTAATATAA
+>34
+TTCTTGGACTACACATTTTTTATTGTTTTA
+>35
+TACTATATACTTCTTCAAATCACA
+>36
+ATTTTACTTCATCATTTTC
+>37
+TATTTCCAACCTTCAACCTCAAATAA
+>38
+CACGACTTTATTCTTTTTATCTCA
+>39
+TTCTTTTACTACATATTTTTTATTTTTTTA
+>40
+TTTATTACAACCCTATCTTACCTCAA
+>41
+CGATATTTTCTCCTCGTACC
+>42
+TAATTACCATTGCTAACTATCCA
+>43
+CATTAATTCATCCATTTAAACTAA
+>44
+GAAACAAACAACACATACCCTCTGGC
+>45
+TACTTTTTTCTTAATTTTTTATTAAAC
+>46
+TAACTTTAACTTTTTTACT
+>47
+TTCTTTTACTACATATTGTTTATTTTTTTA
+>48
+TCTAGTCTGAGCGTAGTACCAGATTG
+>49
+TTTTTTATCAATTTTCACCATTCAT
+>50
+AATGACACACTCTTCATCAAC
+>51
+TAACATAAATTTTAATCATAAATTG
+>52
+TCTATATTATTTTTATCAATTTTCACC
+>53
+TCCCAACCCTCGAGCATCATTTTC
+>54
+TAGTCATACATACCTAATTATACATA
+>55
+TACAAAAAATGCGAAAATTGACCCT
+>56
+GAGAACTTTTAATCATTTTAC
+>57
+TCTTATTTTAATCTTCCAATTTC
+>58
+CGGCACATGTTGAATTACACTCA
+>59
+CAGTTTCACAAAAGATCTTTTAA
+>60
+GCCAACGACCATACCACGA
+>61
+CAAATAACAAACTGAATAAACGAAA
+>62
+TAGTTTCACTACTTTATTCTTTTTA
+>63
+TGAGGAAAACAGAAAAATGAGAGACA
+>64
+TATATAAATCTTCAACATCAA
+>65
+TGATTTACTTACATTCTTTTTTT
+>66
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+>67
+TACTGAAAACGGGCGCATATCAGTGG
+>68
+TATTCAATCACTCCATTATATATAACA
+>69
+TATATTGCCTCCCCATAATCCTT
+>70
+TCGCCGTAAAGCCAGTCGTTCTCC
+>71
+TTTAAACACTTCCTACATCAAATTTC
+>72
+TTTGTTTTTTACTATATTT
+>73
+TCTTTTTTTTAATACTTATTTTCATT
+>74
+TAAGTTTTTAATCATTTTTTTT
+>75
+TGAGAATGACTTCTTCACGATCTCTT
+>76
+AATTTATTTAATTTATATTCTAACTAA
+>77
+AAAAAATATCTTTTTTAACTCGTGGCC
+>78
+AAGACAACAATGACATATAAGACG
+>79
+TGAGCTAGAACTGCACCCACTCCA
+>80
+ACTAAACTTTTCTTACCATATTTCTA
+>81
+TATTTCACTTTATACTTCCTTAA
+>82
+TATCTATCTTTGATCTTCTTTTCA
+>83
+TTTTTATCAATTTTCACCATTC
+>84
+TAAAAATAATTGTCTTTAATTTCA
+>85
+TTAAAGACGCAACAACTAACATT
+>86
+TAACCTTGCAGAACTATACGATTCAAA
+>87
+TACTATTTTATTATACATACATACATTA
+>88
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+>89
+AATGTCACTTGAAGAATTCACGT
+>90
+TAATAAGAAACTGTTCAAACAATCCAC
+>91
+AAGCACGCCTTACCACAATTTATAA
+>92
+CCATATATGACTGACTCATTTCAC
+>93
+TTAATATGTAATTTCATACCTCAC
+>94
+AGGATTTTTAAGCCCATATGTTTCC
+>95
+ACAGCAGGACGGTGATCA
+>96
+TGATGACGGGCAGCAGGGATTTTC
+>97
+TTGTAAAACATTCTTTCTCCTGAC
+>98
+TTCTTCCTTTTATCCTCTCTTAA
+>99
+ATTATTAATAAATTATTATAA
+>100
+CTGGAAACTATTGATCAAATT
+>101
+TACAACTAACATCCTTTCTTCTTCC
+>102
+TCAAATGCAAATTGGATTTATGA
+>103
+TCCTGAGGACGAGGGGCGTTTAGC
+>104
+TACACAGACTTACAAAACACATCCTTC
+>105
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+>106
+TTAAACAATTTGGAATTAATT
+>107
+TAGCCTTTACTAGGCTTTTTCTAA
+>108
+CGATATTTTCTCCTCTTACCT
+>109
+TGGAATGTAAAGAAGTATGGAG
+>110
+TTTTTAACTCCCATCATTTTTCCTC
+>111
+TTTTTTATCATTTTTCACCTAAAAAA
+>112
+GAACAATTTTTCAATTTTTTACATTA
+>113
+TATGGATTATTTCAAAATTTTTTTTT
+>114
+TAACGGAGCACGAGAACGAAGTGG
+>115
+CAATTTTTAATTCCTTTTTTCTTCTT
+>116
+AGATGTTGATCTAAACTCTCCCA
+>117
+TGAGCGGAGAACCAGAGTTGATGAGC
+>118
+TAACAAATAGAACGTTCTAATTTAAA
+>119
+CTAGACAAGATGCTATAAATTTTAAA
+>120
+TTTTCTTTACCCATCTTTACTTTCCC
+>121
+TACACACTCATCAACCAAAGGACG
+>122
+TCTTATTATCATTTTTTTATCCCTT
+>123
+TCAGAGTTCTACAGGTCCTACGATT
+>124
+TTTATTACTTAGTCATAATTCCAA
+>125
+GCCGGGGCGTGAGATGTCTGCATTA
+>126
+GACGAAACGCAACAACAAAATGGACG
+>127
+TAGACTTTCTACTCATTATTAC
+>128
+AAATTGCAAAGATGGAAAATAAAACT
+>129
+CCTTACTCAACATACTTAATCATACTTA
+>130
+TGCTTGGACTACATATGGTTGAGTG
+>131
+GAATGATCGCACCACCACCTCAACGTT
+>132
+TCCACCTATTTATCTTTTCTT
+>133
+TGGACAAGAACCACGCGACGGGTGT
+>134
+CAAGATATGAACAAAGCAAAGACAC
+>135
+CAAACGGAACAAGACATCACCATC
+>136
+NATTCTTACTCCATTTCAATTTACT
+>137
+TAGAACTCGAACCAGAGCTCC
+>138
+CGGAAAAGAATGTAGACCATTTAA
+>139
+TACAAACGGAACTTTCGTCATAA
+>140
+GGTATCTTTATATTTTAATTTTCTT
+>141
+TATTCCGACAATACCTTCTTTAC
+>142
+AACTTTAAATTTTTAATAACCTT
+>143
+CATAATATAAACTTATCTT
+>144
+TATTTATAAATTTTTTCTTGAGAC
+>145
+TTTTTTTTTTGTTTTTATTTTTATCAT
+>146
+TATTATACATAGAATAACAAATCTTT
+>147
+TGGAGTAGCACAGTCGTCTGAAATC
+>148
+TATTTCTTTTTTAACTTCTTTTC
+>149
+TTATAATCACGGCACCCTATACA
+>150
+TTCTTTTACTACATATTTTTTATTTTT
+>151
+TAGCGAGATGGACCAACGTGCTGT
+>152
+CCAGAAAACAATACAACATCCTCA
+>153
+TCCGAAAACAAGGCCCGTCGCT
+>154
+TACTCAATAGAACTCTACTCACTCATA
+>155
+AACGGGGAATAAGGGTTCG
+>156
+TCAGTCTTTTTTTCTCTCCTA
+>157
+AATATAAAAATACAATCAACCATTGCA
+>158
+GGACGGAGAACTGATAAGGGCA
+>159
+TAAAGAAGAAGAATTGATTTTAAT
+>160
+TCATTACACTTCTTACAAAAC
+>161
+CCGCGATCTGCTTATTTATAATCTT
+>162
+TCTAAACACCCACGAAAATCTCTTAC
+>163
+AACAGGAAAAACAGAAGGATTTCTA
+>164
+TCTCTTTTATTTTTATCTTTCCTT
+>165
+AACATTTTATCAATTATACATTA
+>166
+GCAGATAGAAATCAATACAAAAATC
+>167
+TTAATGACACACGGGAAAAACACCG
+>168
+TACAGACAACACATACGGACTTAAGT
+>169
+TCCACAACAACTCTATCTAAAGCATT
+>170
+ATAAAGCTAGATTACCAAAGCAT
+>171
+TACCTCTTTATTAACCTCCACCTCTA
+>172
+TACACCTCTTTTTACTTTTTTATT
+>173
+CACCGAACCGGGAAGGCGAACAAC
+>174
+TAGGTACTTACCTTTTTTTTACACAA
+>175
+AGGAATATGATGAAATAAAAAAAT
+>176
+TATTATTTTTTTATTCCATTCATAT
+>177
+TAAAATAAATAAGTCCGACGACAA
+>178
+TCTGTATTTGACTTATTACTTTCTCC
+>179
+AAGGAATTAAAGCAATAATTCTAA
+>180
+TTCATTTTATTTTTAAATATCTTTTTT
+>181
+TTAGTATTAATCTTCACTTAA
+>182
+TATGGAGAAACAGCGATATAAGTCA
+>183
+CCCGAAAAGCCGAGGACGACTTA
+>184
+CACAGACTGAGGCAGAAAAAACAA
+>185
+TCAAGCCTTTTGAAGAACTGACCTAAA
+>186
+TAAGAAACTGAGCTAACGCAATGTACC
+>187
+CGATATTTTCTCCTCTGACC
+>188
+TAACTACTTTTACATTAATACTAA
+>189
+ACCAGCACCTTCCGACTCAACGTCAAA
+>190
+TCAAAGAACAATGTAAAGCCGCGAC
+>191
+TTGATTCTTCTTTTTCACAAAA
+>192
+TACAAAACAAACAAATTACAATCTAAA
+>193
+GAAACCATTATCTTATCTTTATACA
+>194
+CTATATTTTCTCTCTTACC
+>195
+TCAGAAGAACAGAGAATTGATTTT
+>196
+TAATAAATTATTAAATAAAAAAAAAA
+>197
+CCGACCGAGCAAATAAACACAGGAACG
+>198
+AAGTGAAGAAGTAGTTTTT
+>199
+TGGACGGAGAACTGATAAGGGC
+>200
+TGCTTGGACTACATATGGTTGAGGG
+>201
+TACAAGACTAAAACAAACGTGAAGT
+>202
+TGAAACTGAAACTAACATACAAAATATT
+>203
+TATCTGATCAACAATCTTTTCCCAT
+>204
+TTTAAGACTTATGAGCTTG
+>205
+CAAGGCTCAGAAGAACATCACCAAGACC
+>206
+TTCAAGTAGATTGCATTTTTTAATA
+>207
+CGCAACCAGCAGCAACTCCTAGCAT
+>208
+TACAAACGGAACTTTCTTCATAACTTC
+>209
+ACAAATCATAAATTTTTTTTTACT
+>210
+TCCGAAAAATCGTAGGACCCGGGCA
+>211
+CGCCGCAAGATGAATACTCTAATGA
+>212
+TGACCAAAGACAAACAAACAATAAATA
+>213
+AAAAGGAAAAACAGAAAAATTGGG
+>214
+TTCTTGATAACGCATCTTCTACAT
+>215
+TACTGAAACAAGGAAACACAAGC
+>216
+TCAAAAAGTAATAGGGATCGTTA
+>217
+TGGAATGTAAAGAAGTATGGAG
+>218
+TAAAATTGTAATATTTAAATAATAT
+>219
+GAGGATTAAAAGAACGGTTTATAA
+>220
+TTCTTTGACTACATATTTTTTATT
+>221
+TACAAACGTAATTTTCGCATAACATC
+>222
+AGAACAATTAAATAAAATAGCATA
+>223
+TAATAATTTAAATAAATATAAATTT
+>224
+AATTGCAACAGAGACTGGAA
+>225
+TTAAGTTTTAGACATAATCTATTACAA
+>226
+TGAAAGGAAAAACAGGACACGGGA
+>227
+AACAGGGAGATCAACAGCGTTGACA
+>228
+GGACGGAGAACTGATAATGGC
+>229
+TGTGTAATCTTTCTACTTCTTCTAC
+>230
+TTTACCAGAGGAGTCGAGTTTTT
+>231
+AAAATCGACTGCCGAAAACATTTTAA
+>232
+TGCTTGGACTACATATGGTTGAGTG
+>233
+TACAGACAACACATACGGACTTAA
+>234
+GAAGAGGAGGAGGAGTTTGTAAG
+>235
+TTACAATCTACTATTCTTTTATTA
+>236
+TTATTACATCGTCCACATATAACAAAA
+>237
+TTTTTAACTCATTTTACAATTAAAC
+>238
+AAAACCCGGACAAACCATCGGAGGA
+>239
+CAACACATGACGCGACAATTCTTG
+>240
+TACACACTCATCAACCAAAGTACGTA
+>241
+TTTTCATTTCTTCTTCAAATCCTTT
+>242
+AACAACTGCAAACATCTACCACA
+>243
+TAGTTTTAAATATTTCTTTTTTTC
+>244
+AAAGACAAAAGAAATACAGGCACT
+>245
+TTTGATACCTTTATACCATACCTATT
+>246
+ACAGCAGGACGGTGATCA
+>247
+TTCTAGCACAACACGCACACATATA
+>248
+TAATCAAAAAACTCTTCATTTTTA
+>249
+TAGGAACTTCATACCGGTCTC
+>250
+TATTTCAGCAACAGACTAAGACTAA
b
diff -r 000000000000 -r a8aacccd79a3 test-data/input.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fastqsanger Mon Sep 04 07:13:28 2017 -0400
b
b"@@ -0,0 +1,1000 @@\n+@HWI-49\n+TAAAGCTAGATTACCAAAGCAT\n++\n+BBBA;ACB9ABCBABB@@/?BB\n+@HWI-119\n+TGGACGGAGAACTGATAAGGGC\n++\n+?03@?82?B>C@B>@CC?032<\n+@HWI-127\n+TGGACGGAGAACTGATAAGGGC\n++\n+A>9@>;9>:=@3A7@BB#####\n+@HWI-135\n+TGGACGGAGAACTGATAAGGGC\n++\n+BB6BA5AC+BBBCC6@B>C<'<\n+@HWI-136\n+TGGACGGAGAACTGATAAGGGC\n++\n+B22<A?85=CBCB?;BB===88\n+@HWI-161\n+TGGACGGAGAACTGATAAGGGC\n++\n+B@>>A=?;7:;BB>=C@>)1(:\n+@HWI-166\n+CGGCACATGTTGAAGTACACTCAA\n++\n+B<2>BCBCACC@C@3@BBCBCCCB\n+@HWI-187\n+TGGACGGAGAACTGATAAGGGC\n++\n+B;>@B82:4;B?B?@BB=5:2<\n+@HWI-199\n+GGGAGCGAGACGGGGACTCAC\n++\n+@?@@8?6;@AB=72;;=<AB>\n+@HWI-36\n+ATAAAGCTAGATTACCAAAGCAT\n++\n+BBBB?AAAA9AABA>7@@:-9AB\n+@HWI-39\n+AATGGCACTGGAAGAATTCACGG\n++\n+54=4440)@0365:83:<;5###\n+@HWI-227\n+TGGAATGTAAAGAAGTATGGAG\n++\n+BB>ACCBCCC@9@A4>CA66@>\n+@HWI-249\n+TGGAATGTAAAGAAGTATGGAG\n++\n+A9>?BA2@AA>5<<4<BA5.:)\n+@HWI-252\n+TGGAATGTAAAGAAGTATGGAG\n++\n+>A?:BB:BBA?9<;4?B>3-78\n+@HWI-254\n+TGGAATGTAAAGAAGTATGGAG\n++\n+B>A=B@1>B?47<?B9A?09?0\n+@HWI-278\n+TGGACGGAGAACTGATAAGGGC\n++\n+A54;A<3:?9A@A7<BA?755=\n+@HWI-216\n+TGGACGGAGAACTGATAAGGGCA\n++\n+?34@><6=A<B?@5=A?<30./;\n+@HWI-307\n+TGGACGGAGAACTGATAAGGGC\n++\n+B>??BB;@<@BABB@CC==:0?\n+@HWI-332\n+TGGACGGAGAACTGATAAGGGC\n++\n+B?8@A>B;BAB@BA?CB@A731\n+@HWI-211\n+TGGAATGTAAAGAAGTATGGAGA\n++\n+A61=BA;7B@BBA:9:B?;;@2=\n+@HWI-333\n+TGGAATGTAAAGAAGTATGGA\n++\n+BA>>AB8@BB?=>@98BBA>#\n+@HWI-355\n+TGGAATGTAAAGAAGTATGGAG\n++\n+B>8BBA7@CBA;>A2?BB2.>8\n+@HWI-282\n+TGGAATGTAAAGAATTATGGAG\n++\n+B>8>@A7@8=>>997-8A72##\n+@HWI-378\n+TGGAATGTAAAGAAGTATGGAG\n++\n+B7=;AA8=A@@5=;4>BB31:2\n+@HWI-372\n+AATGGCACTGGAAGAATTCACGGG\n++\n+BBA;@;>A@;@=?@:?B@######\n+@HWI-388\n+TGGAATGTAAAGAAGTATGGAG\n++\n+BA@?BB?BBB??A?0:BA53<>\n+@HWI-399\n+TGGAATGTAAAGAAGTATGGAG\n++\n+A;*>?B9@B<+1<@<4B@7)@-\n+@HWI-109\n+TGGACGGAGAACTGATAAGGGC\n++\n+BC@CBCC@CBB9@8?CCC>AA;\n+@HWI-421\n+TGGACGGAGAACTGATAAGGGC\n++\n+A66?A;0<@;B?A;?BB>38-7\n+@HWI-412\n+TGTGATGTGACGTAGTGGAA\n++\n+BA=?>B<>9><09?0=656=\n+@HWI-432\n+TGGACGGAGAACTGATAAGGGC\n++\n+A;;B>=;=2@?0@@>?CBA@2=\n+@HWI-442\n+TGGACGGAGAACTGATAAGGGC\n++\n+BBA?=;;>B6??@07AA#####\n+@HWI-470\n+TGGAATGTAAAGAAGTATGGAG\n++\n+B?3>AC>:AA?AA<)7BA;7=:\n+@HWI-499\n+TGGACGGAGAACTGATAAGGGC\n++\n+BA>??=:;B=B8A>>B9=/55=\n+@HWI-100\n+TGGACGGAGAACTGATAAGGGC\n++\n+A;6@@96@;?A@BBABC?3<5>\n+@HWI-514\n+TGACTAGATCCACACTCATTAA\n++\n+BAABC?7@C@ABABBBBABBBB\n+@HWI-521\n+TGGACGGAGAACTGATAAGGGC\n++\n+A2=A5=2=>@B;C>:7A>>:1.\n+@HWI-545\n+TGGACGGAGAACTGATAAGGGC\n++\n+BA?BBBAABAB@B>?BB@>A><\n+@HWI-549\n+TGGACGGAGAACTGATAAGGGC\n++\n+A66<;;0<>:@4A1<>>#####\n+@HWI-431\n+TATCACAGCCAGCTTTGAGGAG\n++\n+B>B?B?@A?69?38<@5902;9\n+@HWI-579\n+TGGAATGTAAAGAAGTATGGA\n++\n+BBB@A@BB>B=1<?>>BA###\n+@HWI-583\n+TGGACGGAGAACTGATAAGGGC\n++\n+BB:@BCB>>BBAB@>@BA8;00\n+@HWI-587\n+TAGCACCACATGATTCGGCT\n++\n+BA;/A;;A1BA:@BB>=4;?\n+@HWI-516\n+TGGACGGAGAACTGATAAGGGCA\n++\n+B??>>AA>A9A3A69B?;64779\n+@HWI-113\n+TGGACGGAGAACTGATAAGGGCA\n++\n+BBC@@;>BCBCBB*BCC@95**C\n+@HWI-592\n+TGGAATGTAAAGAAGTATGGAG\n++\n+BBBBBB?ABA;9@@2=B@)1=4\n+@HWI-602\n+TGGACGGAGAACTGATAAGGGC\n++\n+BA;>A@>=8=@?BB>B?:0906\n+@HWI-608\n+TGGACGGAGAACTGATAAGGGC\n++\n+BBC>CC9>??CAA9=B@=::36\n+@HWI-590\n+GGGAGCGAGACGGGGACTCACT\n++\n+=B=AAAA?CBAB@=A95=%:0;\n+@HWI-612\n+CGGCACATGTTGAAGTACACTCA\n++\n+B?>BBBCBABCCA=7@AAABBAA\n+@HWI-610\n+TCAGGTACCTGAAGTAGCGCGCG\n++\n+BB@;@2>;>?66>963#######\n+@HWI-629\n+TGGACGGAGAACTGATAAGGGCA\n++\n+A68@?;6<0<A=A99BB;'--8;\n+@HWI-170\n+TGGACGGAGAACTGATAAGGGC\n++\n+BABCB;;=;=B@BAACB@)80?\n+@HWI-382\n+TGGACGGAGAACTGATAAGGGCAT\n++\n+BB>C39>;B4B@CB73@@;<(>BB\n+@HWI-648\n+TGGACGGAGAACTGATAAGGGC\n++\n+A8=A?@9<A=A?A;=B>=275<\n+@HWI-377\n+TGGACGGAGAACTGATAAGGGCAA\n++\n+B>5>4?A?A;>0:9<=0;;:0.88\n+@HWI-658\n+TGGAATGTAAAGAAGTATGGAG\n++\n+B>8>AB9@B?AA>=;>AA82<8\n+@HWI-677\n+TGGAATGTAAAGAAGTATGG\n++\n+A:0<B>)<BB@2=>2=AB<4\n+@HWI-684\n+TGGAATGTAAAGAAGTATGGAG\n++\n+BAA?BBA@BA==?>8@B?02:)\n+@HWI-699\n+TGGACGGAGAACTGATAAGGGC\n++\n+B??B@>;<;>B?BB?AB>####\n+@HWI-707\n+TGGAATGTAAAGAAGAATGGAG\n++\n+BBAB9*===@7;;:(.,B??@;\n+@HWI-710\n+TGGACGGAGAACTGATAAGGGC\n++\n+BA?AA?8>A=B?BA>AA?####\n+@HWI-700\n+TGGACGGAGAACTGATAAGGGC\n++\n+BCB>4BB@8<A<A>=CB>130-\n+@HWI-780\n+TGGACGGAGAACTGATAAGGGC\n++\n+BBAABCAA<ABAB@?A>>0/29\n+@HWI-766\n+TGGACGGAGAACTGATAAGGGCAA\n++\n+A;3=C@-<A@CCA;@CC<00;@A"..b"\n++\n+BB@B@AA?A@B?B:>AB@943\n+@HWI-1780\n+TGGACGGAGAACTGATAAGGGCAA\n++\n+B;;BA@9?BAB?=-<@>@3:4>;?\n+@HWI-1800\n+TGGAATGTAAAGAAGTATGGAG\n++\n+BAB?BBA@B@:)9=AAB@34?0\n+@HWI-1806\n+TGGAATGTAAAGAAGTATGGA\n++\n+BBBBCCABCCA=AA<;CB@2>\n+@HWI-1821\n+TGGAATGTAAAGAAGTATGGAG\n++\n+B@8?AB;@BB>;>>;;@@:2=7\n+@HWI-1766\n+TGGACGGAGAACTGATAAGGGC\n++\n+BCACCCC?:C@7BAAACB6>2<\n+@HWI-1830\n+TGGACGGAGAACTGATAAGGGC\n++\n+BA@BB>@>>@B@A5>@B@866;\n+@HWI-1846\n+TGGACGGAGAACTGATAAGGGC\n++\n+BB?;5BA?@8B;>>@A9<####\n+@HWI-1851\n+TGGAATGTAAAGAAGTATGGA\n++\n+BBB@BBBABB@3??A@AB###\n+@HWI-1856\n+TGGAATGTAAAGAAGTATGGAG\n++\n+B@:=@B?BBB=:>@@>BB=:>;\n+@HWI-1873\n+TGGAATGTAAAGAAGTATGGAG\n++\n+BB@?BB@BBB???=>=B@2)<=\n+@HWI-1876\n+TGGACGGAGAACTGATAAGGGC\n++\n+A==B@?B;?>B=72;B==/<##\n+@HWI-1885\n+TGGAATGTAAAGAAGTATGGAG\n++\n+A74@BCBABAA:<?03BB:3=2\n+@HWI-1888\n+TGTGATGTGACGTAGTGGAA\n++\n+B@@:>>1<:62;?=7928=@\n+@HWI-1889\n+TGGACGGAGAACTGATAAGGGC\n++\n+A>;@B@;@=@CAB;@BB?330:\n+@HWI-1894\n+TAAGGAAATAGTAGCCGTGAT\n++\n+BAA>><AAA=<<>########\n+@HWI-1912\n+TGGACGGAGAACTGATAAGGG\n++\n+=<??67;8B>A4?A=)>1@?:\n+@HWI-1914\n+TAAGGAAATAGTAGCCGTGAT\n++\n+BBBB7BB>9B;=AA:A>####\n+@HWI-1915\n+TAGGAACTTCATACCGTGCTCT\n++\n+BC=9@BBCCBCBCBA*?3?BAB\n+@HWI-1931\n+TGGACGGAGAACTGATAAGGGCAA\n++\n+A6;@B=5@=@BBCB?B@A13)=9@\n+@HWI-1940\n+TGGAATGTAAAGAAGTATGGAG\n++\n+BC@BBBCBBAA@;?7<BA67?B\n+@HWI-1939\n+TGTGATGTGACGTAGTGGAA\n++\n+B?B??B<B??=8A=8>757:\n+@HWI-1943\n+TGTGATGTGACGTAGTGGAA\n++\n+BA<:<B<;5;94:<-<7###\n+@HWI-1949\n+TGTGATGTGACGTAGTGGAA\n++\n+BA?>7BB>>=?AA<9?>>5;\n+@HWI-1955\n+TGGAATGTAAAGAAGTATGGAG\n++\n+B?;>AB;>BA???>49BA92=2\n+@HWI-1884\n+TGGACGGAGAACTGATAAGGGC\n++\n+B==A=3<AB@B<A;AB@=5/5?\n+@HWI-1973\n+TGTGATGTGACGTAGTGGAAA\n++\n+A9><=B>;8877><9?:####\n+@HWI-1983\n+TGGACGGAGAACTGATAAGGGC\n++\n+B=1@B@6:B@B@BB=BBBA@;@\n+@HWI-1980\n+TGGACGGAGAACTGATAAGGGC\n++\n+BABBB=>>6?C@BBBBA=(52@\n+@HWI-1992\n+TGTGATGTGACGTAGTGGAA\n++\n+BA@9=@7@A?<>>?:@7:9>\n+@HWI-1995\n+CAAATTCGGTTCTAGAGAGGTT\n++\n+B<ABACBCA<C@A@==;=BBAB\n+@HWI-2007\n+TGGACGGAGAACTGATAAGGGT\n++\n+B?BBBA6ABBB?CBACB?AA?>\n+@HWI-2011\n+ACTGAATTCTCGTGGGTCTGCAT\n++\n+BAB==B?B@A@8;/.(:?A>:<?\n+@HWI-2017\n+TCAGGTACCTGAAGTAGCGCGCG\n++\n+BC@?727=B@5<<##########\n+@HWI-2013\n+AATTGCACTAGTCCCGGCCTG\n++\n+BCA>5;A=A>?=B@@4:?@8'\n+@HWI-2040\n+TGGACGGAGAACTGATAAGGGC\n++\n+BBBBB=BBB9BBB@?BB:88>=\n+@HWI-2049\n+TGGAATGTAAAGAAGTATGGAG\n++\n+BA>=ABAABB>5><0;@?.-94\n+@HWI-2053\n+TATCACAGCCATTTTGACGAGTT\n++\n+BCBBCBC>A@ABCBC@@=;</:C\n+@HWI-2055\n+TATTGCACTTGAGACGGCCTTA\n++\n+BBCC>BCBCBAA;<B</-=@2:\n+@HWI-2104\n+TGGACGGAGAACTGATAAGGGC\n++\n+A9@B@AA>A?A?BAACA=805;\n+@HWI-2106\n+TGGACGGAGAACTGATAAGGGCT\n++\n+A:5;BA8;A=A@?4;BA93-0:=\n+@HWI-2100\n+TCAGGTACCTGAAGTAGCGCGCGTTAT\n++\n+BB@579A@B<3;=07615830,6>=##\n+@HWI-2072\n+GGACGGAGAACTGATAAGGGCTCGG\n++\n+ABCBBA.3@@:A=@B=@@0<;@;.<\n+@HWI-2124\n+TGGACGGAGAACTGATAAGGGC\n++\n+?00@B@1>?AB@>9;B?>503?\n+@HWI-2136\n+TGGACGGAGAACTGATAAGGGC\n++\n+BBBBBBCABB@>BB?CBB?>>:\n+@HWI-2137\n+TGTGATGTGACGTAGTGGAA\n++\n+B>=<:<-149;8>8.93379\n+@HWI-2141\n+TGTGATGTGACGTAGTGGAAA\n++\n+BB@AAB<@8>=+=@1=/5=B?\n+@HWI-2117\n+CGGCACATGTTGAAGTACACTCA\n++\n+B>ABBABA>@?)=?0>CABABBB\n+@HWI-2146\n+TGGACGGAGAACTGATAAGGGC\n++\n+B;;BB4=ABBCBB8@CC;/5;B\n+@HWI-2148\n+TGGACGGAGAACTGATAAGGGC\n++\n+BBABA??@@AA:B@@AB@6=8=\n+@HWI-2151\n+TGGACGGAGAACTGATAAGGGC\n++\n+BCA@>BCB?3ABBA?B?A7@6B\n+@HWI-2145\n+TGTGATGTGACGTAGTGGAA\n++\n+B>?BBB>A>A;9<@1@559<\n+@HWI-2158\n+AATGGCACTGGAAGAATTCACGGG\n++\n+BCBABBBB?50=?8;A=>9>;###\n+@HWI-2142\n+TGGACGGAGAACTGATAAGGGC\n++\n+A4;@<@?=<@>@BB>BB8<0:7\n+@HWI-2140\n+TCAGGTACCTGAAGTAGCGCGCG\n++\n+BBABB?B=>=0<@>7<394:4:4\n+@HWI-2191\n+AATGGCACTGGAAGAATTCACGTG\n++\n+ABB?A@ABA;5;>8<??#######\n+@HWI-2183\n+TGGACGGAGAACTGATAAGGGC\n++\n+BAA?@>@>:;>=A>>BA:3107\n+@HWI-2203\n+TGGAATGTAAAGGAGTATGGAG\n++\n+BBABBC@?@A83?A>BBB>7?:\n+@HWI-2200\n+TCTTTGGTATTCTAGCTGTAGA\n++\n+BBCBCC@BACC?CB:AB5@###\n+@HWI-2209\n+TGGACGGAGAACTGATAAGGGC\n++\n+BAAA?A>>=;@;B;>@@>5/94\n+@HWI-2218\n+TGGAATGTAAAGAAGTATGGAG\n++\n+BC>BBCBBBC?A?@<>BB2);7\n+@HWI-2222\n+TGGACGGAGAACTGATAAGGGCA\n++\n+BCABBB?@BBCBBAABB@94;@@\n+@HWI-2227\n+TGGACGGAGAACTGATAAGGGC\n++\n+BBBBBBB?B@BAA;>BBA:0;>\n+@HWI-2225\n+AATGGCACTGGAAGAATTCACGG\n++\n+ABBA@?B@:6>;?@9@@B=?::?\n+@HWI-2230\n+TGGACGGAGAACTGATAAGGGC\n++\n+BBBB@CB@?>B?A;>=@>2/7;\n"
b
diff -r 000000000000 -r a8aacccd79a3 test-data/input.sorted.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.sorted.fa Mon Sep 04 07:13:28 2017 -0400
b
@@ -0,0 +1,500 @@
+>1
+>10
+>100
+>101
+>102
+>103
+>104
+>105
+>106
+>107
+>108
+>109
+>11
+>110
+>111
+>112
+>113
+>114
+>115
+>116
+>117
+>118
+>119
+>12
+>120
+>121
+>122
+>123
+>124
+>125
+>126
+>127
+>128
+>129
+>13
+>130
+>131
+>132
+>133
+>134
+>135
+>136
+>137
+>138
+>139
+>14
+>140
+>141
+>142
+>143
+>144
+>145
+>146
+>147
+>148
+>149
+>15
+>150
+>151
+>152
+>153
+>154
+>155
+>156
+>157
+>158
+>159
+>16
+>160
+>161
+>162
+>163
+>164
+>165
+>166
+>167
+>168
+>169
+>17
+>170
+>171
+>172
+>173
+>174
+>175
+>176
+>177
+>178
+>179
+>18
+>180
+>181
+>182
+>183
+>184
+>185
+>186
+>187
+>188
+>189
+>19
+>190
+>191
+>192
+>193
+>194
+>195
+>196
+>197
+>198
+>199
+>2
+>20
+>200
+>201
+>202
+>203
+>204
+>205
+>206
+>207
+>208
+>209
+>21
+>210
+>211
+>212
+>213
+>214
+>215
+>216
+>217
+>218
+>219
+>22
+>220
+>221
+>222
+>223
+>224
+>225
+>226
+>227
+>228
+>229
+>23
+>230
+>231
+>232
+>233
+>234
+>235
+>236
+>237
+>238
+>239
+>24
+>240
+>241
+>242
+>243
+>244
+>245
+>246
+>247
+>248
+>249
+>25
+>250
+>26
+>27
+>28
+>29
+>3
+>30
+>31
+>32
+>33
+>34
+>35
+>36
+>37
+>38
+>39
+>4
+>40
+>41
+>42
+>43
+>44
+>45
+>46
+>47
+>48
+>49
+>5
+>50
+>51
+>52
+>53
+>54
+>55
+>56
+>57
+>58
+>59
+>6
+>60
+>61
+>62
+>63
+>64
+>65
+>66
+>67
+>68
+>69
+>7
+>70
+>71
+>72
+>73
+>74
+>75
+>76
+>77
+>78
+>79
+>8
+>80
+>81
+>82
+>83
+>84
+>85
+>86
+>87
+>88
+>89
+>9
+>90
+>91
+>92
+>93
+>94
+>95
+>96
+>97
+>98
+>99
+AAAAAATATCTTTTTTAACTCGTGGCC
+AAAAACACACAGACACAAGCAGCAAT
+AAAACCCGGACAAACCATCGGAGGA
+AAAAGGAAAAACAGAAAAATTGGG
+AAAATCGACTGCCGAAAACATTTTAA
+AAAGACAAAAGAAATACAGGCACT
+AAATTGCAAAGATGGAAAATAAAACT
+AACAACTGCAAACATCTACCACA
+AACAGGAAAAACAGAAGGATTTCTA
+AACAGGGAGATCAACAGCGTTGACA
+AACATTTTATCAATTATACATTA
+AACGGGGAATAAGGGTTCG
+AACTCTCTAATTTAACTTTGTGC
+AACTTTAAATTTTTAATAACCTT
+AAGACAACAATGACATATAAGACG
+AAGATGGAGTAGTTTTTT
+AAGCACGCCTTACCACAATTTATAA
+AAGGAATTAAAGCAATAATTCTAA
+AAGTGAAGAAGTAGTTTTT
+AATATAAAAATACAATCAACCATTGCA
+AATCTACAATTTCCATTACGACTCC
+AATGACACACTCTTCATCAAC
+AATGTCACTTGAAGAATTCACGT
+AATTGCAACAGAGACTGGAA
+AATTTATTTAATTTATATTCTAACTAA
+ACAAATCATAAATTTTTTTTTACT
+ACAACCTCAACTCATATTT
+ACAGCAGGACGGTGATCA
+ACAGCAGGACGGTGATCA
+ACCAGCACCTTCCGACTCAACGTCAAA
+ACTAAACTTTTCTTACCATATTTCTA
+AGAACAATTAAATAAAATAGCATA
+AGATGTTGATCTAAACTCTCCCA
+AGGAATATGATGAAATAAAAAAAT
+AGGATTTTTAAGCCCATATGTTTCC
+ATAAAGCTAGATTACCAAAGCAT
+ATGAAATTCGAACAATACGTC
+ATGTTATTTACTTTTTCCCCTTATA
+ATTATTAATAAATTATTATAA
+ATTTTACTTCATCATTTTC
+CAAACGGAACAAGACATCACCATC
+CAAATAACAAACTGAATAAACGAAA
+CAACACATGACGCGACAATTCTTG
+CAAGAATACAAAAAATACTAATTA
+CAAGATATGAACAAAGCAAAGACAC
+CAAGGCTCAGAAGAACATCACCAAGACC
+CAATTTTTAATTCCTTTTTTCTTCTT
+CACAGACTGAGGCAGAAAAAACAA
+CACCGAACCGGGAAGGCGAACAAC
+CACGACTTTATTCTTTTTATCTCA
+CAGATTCACTGATTTTCTTACGCC
+CAGTTTCACAAAAGATCTTTTAA
+CATAATATAAACTTATCTT
+CATACCTACAAAAAAGCTTCTCTTAC
+CATACCTTAAATTATCTCTTTCTT
+CATTAATTCATCCATTTAAACTAA
+CCAGAAAACAATACAACATCCTCA
+CCATATATGACTGACTCATTTCAC
+CCCGAAAAGCCGAGGACGACTTA
+CCGACCGAGCAAATAAACACAGGAACG
+CCGCGATCTGCTTATTTATAATCTT
+CCTTACTCAACATACTTAATCATACTTA
+CGATATTTTCTCCTCGTACC
+CGATATTTTCTCCTCTGACC
+CGATATTTTCTCCTCTTACC
+CGATATTTTCTCCTCTTACCT
+CGCAACCAGCAGCAACTCCTAGCAT
+CGCCGCAAGATGAATACTCTAATGA
+CGGAAAAGAATGTAGACCATTTAA
+CGGACGGTATATTTTTTAATATAA
+CGGCACATGTTGAATTACACTCA
+CTAGACAAGATGCTATAAATTTTAAA
+CTATATTTTCTCTCTTACC
+CTGGAAACTATTGATCAAATT
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+GAAACAAACAACACATACCCTCTGGC
+GAAACCATTATCTTATCTTTATACA
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+GAACAATTTTTCAATTTTTTACATTA
+GAAGAGGAGGAGGAGTTTGTAAG
+GAATGATCGCACCACCACCTCAACGTT
+GACGAAACGCAACAACAAAATGGACG
+GAGAACTTTTAATCATTTTAC
+GAGAATAAATATTTCAATGGTCTATTG
+GAGGATTAAAAGAACGGTTTATAA
+GCAGATAGAAATCAATACAAAAATC
+GCCAACGACCATACCACGA
+GCCGGGGCGTGAGATGTCTGCATTA
+GGACGGAGAACTGATAAGGGCA
+GGACGGAGAACTGATAATGGC
+GGAGATTGTAGAACGAAAGGAAAAT
+GGTATCTTTATATTTTAATTTTCTT
+NATTCTTACTCCATTTCAATTTACT
+TAAAAATAATTGTCTTTAATTTCA
+TAAAATAAACCAAAACCCAAAAAT
+TAAAATAAATAAGTCCGACGACAA
+TAAAATTGTAATATTTAAATAATAT
+TAAAGAAGAAGAATTGATTTTAAT
+TAACAAATAGAACGTTCTAATTTAAA
+TAACATAAATTTTAATCATAAATTG
+TAACCTTGCAGAACTATACGATTCAAA
+TAACGGAGCACGAGAACGAAGTGG
+TAACTACTTTTACATTAATACTAA
+TAACTTTAACTTTTTTACT
+TAAGAAACTGAGCTAACGCAATGTACC
+TAAGTTTTTAATCATTTTTTTT
+TAATAAATTATTAAATAAAAAAAAAA
+TAATAAGAAACTGTTCAAACAATCCAC
+TAATAATTTAAATAAATATAAATTT
+TAATCAAAAAACTCTTCATTTTTA
+TAATTACCATTGCTAACTATCCA
+TACAAAAAATGCGAAAATTGACCCT
+TACAAAACAAACAAATTACAATCTAAA
+TACAAACGGAACTTTCGTCATAA
+TACAAACGGAACTTTCTTCATAACTTC
+TACAAACGTAATTTTCGCATAACATC
+TACAAATTTTTTTTTCTTTCTTAT
+TACAACTAACATCCTTTCTTCTTCC
+TACAAGACTAAAACAAACGTGAAGT
+TACACACTCATCAACCAAAGGACG
+TACACACTCATCAACCAAAGTACGTA
+TACACAGACTTACAAAACACATCCTTC
+TACACCTCTTTTTACTTTTTTATT
+TACAGACAACACATACGGACTTAA
+TACAGACAACACATACGGACTTAAGT
+TACAGAGAAATATACAACACTCACC
+TACCTCTTTATTAACCTCCACCTCTA
+TACTATATACTTCTTCAAATCACA
+TACTATTTTATTATACATACATACATTA
+TACTCAATAGAACTCTACTCACTCATA
+TACTGAAAACGGGCGCATATCAGTGG
+TACTGAAACAAGGAAACACAAGC
+TACTTTTTTCTTAATTTTTTATTAAAC
+TAGAACTCGAACCAGAGCTCC
+TAGACTTTCTACTCATTATTAC
+TAGCCTTTACTAGGCTTTTTCTAA
+TAGCGAGATGGACCAACGTGCTGT
+TAGGAACTTCATACCGGTCTC
+TAGGTACTTACCTTTTTTTTACACAA
+TAGTCATACATACCTAATTATACATA
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+TAGTTACCTTCATATCTCTCTTTA
+TAGTTTCACTACTTTATTCTTTTTA
+TAGTTTTAAATATTTCTTTTTTTC
+TATAATTTTATTTTATATTTTCTCT
+TATATAAATCTTCAACATCAA
+TATATTGCCTCCCCATAATCCTT
+TATCTATCTTTGATCTTCTTTTCA
+TATCTGATCAACAATCTTTTCCCAT
+TATGGAGAAACAGCGATATAAGTCA
+TATGGATTATTTCAAAATTTTTTTTT
+TATTATACATAGAATAACAAATCTTT
+TATTATCATCTCGTTCTTCCTTCTC
+TATTATTTTTTTATTCCATTCATAT
+TATTCAATCACTCCATTATATATAACA
+TATTCCGACAATACCTTCTTTAC
+TATTTATAAATTTTTTCTTGAGAC
+TATTTCACTTTATACTTCCTTAA
+TATTTCAGCAACAGACTAAGACTAA
+TATTTCCAACCTTCAACCTCAAATAA
+TATTTCTTTTTTAACTTCTTTTC
+TCAAAAAGTAATAGGGATCGTTA
+TCAAAGAACAATGTAAAGCCGCGAC
+TCAAATGCAAATTGGATTTATGA
+TCAACTGGCAAGAATTTTTGAAAATT
+TCAAGCCTTTTGAAGAACTGACCTAAA
+TCAGAAGAACAGAGAATTGATTTT
+TCAGAGTTCTACAGGTCCTACGATT
+TCAGTCTTTTTTTCTCTCCTA
+TCATTACACTTCTTACAAAAC
+TCCACAACAACTCTATCTAAAGCATT
+TCCACCTATTTATCTTTTCTT
+TCCCAACCCTCGAGCATCATTTTC
+TCCGAAAAATCGTAGGACCCGGGCA
+TCCGAAAACAAGGCCCGTCGCT
+TCCTGAGGACGAGGGGCGTTTAGC
+TCGCCGTAAAGCCAGTCGTTCTCC
+TCTAAACACCCACGAAAATCTCTTAC
+TCTAGTCTGAGCGTAGTACCAGATTG
+TCTATATTATTTTTATCAATTTTCACC
+TCTATTCATACAAAACACTAATACCC
+TCTATTTCTTTATTTTTTTTATTAT
+TCTCATCTTACAATTTTTTAAAACTT
+TCTCTTTTATTTTTATCTTTCCTT
+TCTGTATTTGACTTATTACTTTCTCC
+TCTTATTATCATTTTTTTATCCCTT
+TCTTATTTTAATCTTCCAATTTC
+TCTTCTATATAATCCTTTATTATAA
+TCTTTTTTTTAATACTTATTTTCATT
+TGAAACTGAAACTAACATACAAAATATT
+TGAAAGGAAAAACAGGACACGGGA
+TGACCAAAGACAAACAAACAATAAATA
+TGAGAATGACTTCTTCACGATCTCTT
+TGAGCGGAGAACCAGAGTTGATGAGC
+TGAGCTAGAACTGCACCCACTCCA
+TGAGGAAAACAGAAAAATGAGAGACA
+TGATGACGGGCAGCAGGGATTTTC
+TGATTTACTTACATTCTTTTTTT
+TGCTTGGACTACATATGGTTGAGGG
+TGCTTGGACTACATATGGTTGAGTG
+TGCTTGGACTACATATGGTTGAGTG
+TGCTTTTACTACATATTTTTTATTTTTTTA
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGACAAGAACCACGCGACGGGTGT
+TGGACGGAGAACTGATAAGGGC
+TGGAGTAGCACAGTCGTCTGAAATC
+TGTGTAATCTTTCTACTTCTTCTAC
+TTAAACAATTTGGAATTAATT
+TTAAAGACGCAACAACTAACATT
+TTAAGTTTTAGACATAATCTATTACAA
+TTAATATGTAATTTCATACCTCAC
+TTAATGACACACGGGAAAAACACCG
+TTACAATCTACTATTCTTTTATTA
+TTACTAGATCCACCCTCATTA
+TTAGTATTAATCTTCACTTAA
+TTATAATCACGGCACCCTATACA
+TTATTACATCGTCCACATATAACAAAA
+TTATTATCTATTTTAATTTTTCTTAA
+TTCAAGTAGATTGCATTTTTTAATA
+TTCATTTTATTTTTAAATATCTTTTTT
+TTCTAGCACAACACGCACACATATA
+TTCTTCCTTTTATCCTCTCTTAA
+TTCTTGATAACGCATCTTCTACAT
+TTCTTGGACTACACATTTTTTATTGTTTTA
+TTCTTTGACTACATATTTTTTATT
+TTCTTTTACTACATATTGTTTATTTTTTTA
+TTCTTTTACTACATATTTTTTATTTTT
+TTCTTTTACTACATATTTTTTATTTTTTTA
+TTGATTCTTCTTTTTCACAAAA
+TTGTAAAACATTCTTTCTCCTGAC
+TTTAAACACTTCCTACATCAAATTTC
+TTTAAGACTTATGAGCTTG
+TTTACCAGAGGAGTCGAGTTTTT
+TTTATTACAACCCTATCTTACCTCAA
+TTTATTACTTAGTCATAATTCCAA
+TTTCCGACAAATACACCATCTTC
+TTTGATACCTTTATACCATACCTATT
+TTTGTTTTTCATTTTTTTATCTTT
+TTTGTTTTTTACTATATTT
+TTTTATATTTCCTTATATCTTTACTA
+TTTTCATTTCTTCTTCAAATCCTTT
+TTTTCTTTACCCATCTTTACTTTCCC
+TTTTCTTTTTTTCATTTTCTCTTTTA
+TTTTTAACTCATTTTACAATTAAAC
+TTTTTAACTCCCATCATTTTTCCTC
+TTTTTATCAATTTTCACCATTC
+TTTTTTATCAATTTTCACCATTCAT
+TTTTTTATCATTTTTCACCTAAAAAA
+TTTTTTTTTTGTTTTTATTTTTATCAT
b
diff -r 000000000000 -r a8aacccd79a3 test-data/output.faw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.faw Mon Sep 04 07:13:28 2017 -0400
b
b'@@ -0,0 +1,494 @@\n+>1_2\n+ACAGCAGGACGGTGATCA\n+>2_2\n+TGCTTGGACTACATATGGTTGAGTG\n+>3_2\n+TGGAATGTAAAGAAGTATGGAG\n+>4_1\n+AATTTATTTAATTTATATTCTAACTAA\n+>5_1\n+TTATAATCACGGCACCCTATACA\n+>6_1\n+TCCGAAAAATCGTAGGACCCGGGCA\n+>7_1\n+TCCCAACCCTCGAGCATCATTTTC\n+>8_1\n+TTGATTCTTCTTTTTCACAAAA\n+>9_1\n+AGAACAATTAAATAAAATAGCATA\n+>10_1\n+CCAGAAAACAATACAACATCCTCA\n+>11_1\n+TCTAGTCTGAGCGTAGTACCAGATTG\n+>12_1\n+TTTTTAACTCCCATCATTTTTCCTC\n+>13_1\n+GCAGATAGAAATCAATACAAAAATC\n+>14_1\n+AAGTGAAGAAGTAGTTTTT\n+>15_1\n+AATGTCACTTGAAGAATTCACGT\n+>16_1\n+TTTACCAGAGGAGTCGAGTTTTT\n+>17_1\n+GGTATCTTTATATTTTAATTTTCTT\n+>18_1\n+TTACTAGATCCACCCTCATTA\n+>19_1\n+TATATAAATCTTCAACATCAA\n+>20_1\n+GAAACCATTATCTTATCTTTATACA\n+>21_1\n+GGAGATTGTAGAACGAAAGGAAAAT\n+>22_1\n+ACTAAACTTTTCTTACCATATTTCTA\n+>23_1\n+TCAAGCCTTTTGAAGAACTGACCTAAA\n+>24_1\n+TAACATAAATTTTAATCATAAATTG\n+>25_1\n+TAGCGAGATGGACCAACGTGCTGT\n+>26_1\n+TACAAACGTAATTTTCGCATAACATC\n+>27_1\n+CGGAAAAGAATGTAGACCATTTAA\n+>28_1\n+GAAAGGAAGGGAAGAAAGCGAAAGGA\n+>29_1\n+CAAGAATACAAAAAATACTAATTA\n+>30_1\n+CATACCTACAAAAAAGCTTCTCTTAC\n+>31_1\n+TAAAATAAATAAGTCCGACGACAA\n+>32_1\n+AGGAATATGATGAAATAAAAAAAT\n+>33_1\n+TTTTTTTTTTGTTTTTATTTTTATCAT\n+>34_1\n+TTTTTAACTCATTTTACAATTAAAC\n+>35_1\n+CCGCGATCTGCTTATTTATAATCTT\n+>36_1\n+TAGGTACTTACCTTTTTTTTACACAA\n+>37_1\n+TCATTACACTTCTTACAAAAC\n+>38_1\n+TATTATACATAGAATAACAAATCTTT\n+>39_1\n+ATGTTATTTACTTTTTCCCCTTATA\n+>40_1\n+TCCGAAAACAAGGCCCGTCGCT\n+>41_1\n+TGCTTTTACTACATATTTTTTATTTTTTTA\n+>42_1\n+GAACAATTTTTCAATTTTTTACATTA\n+>43_1\n+AACATTTTATCAATTATACATTA\n+>44_1\n+TATCTGATCAACAATCTTTTCCCAT\n+>45_1\n+AATTGCAACAGAGACTGGAA\n+>46_1\n+TATTCAATCACTCCATTATATATAACA\n+>47_1\n+CAAACGGAACAAGACATCACCATC\n+>48_1\n+TCTTCTATATAATCCTTTATTATAA\n+>49_1\n+TGATGACGGGCAGCAGGGATTTTC\n+>50_1\n+TTTATTACAACCCTATCTTACCTCAA\n+>51_1\n+AACAGGAAAAACAGAAGGATTTCTA\n+>52_1\n+CGATATTTTCTCCTCGTACC\n+>53_1\n+AAATTGCAAAGATGGAAAATAAAACT\n+>54_1\n+TACACACTCATCAACCAAAGGACG\n+>55_1\n+TATATTGCCTCCCCATAATCCTT\n+>56_1\n+TACAAACGGAACTTTCTTCATAACTTC\n+>57_1\n+GGACGGAGAACTGATAATGGC\n+>58_1\n+TTCTTTGACTACATATTTTTTATT\n+>59_1\n+TCTTTTTTTTAATACTTATTTTCATT\n+>60_1\n+TACTTTTTTCTTAATTTTTTATTAAAC\n+>61_1\n+AATATAAAAATACAATCAACCATTGCA\n+>62_1\n+CAGTTTCACAAAAGATCTTTTAA\n+>63_1\n+GAAACAAACAACACATACCCTCTGGC\n+>64_1\n+AATGACACACTCTTCATCAAC\n+>65_1\n+AAGATGGAGTAGTTTTTT\n+>66_1\n+ACAACCTCAACTCATATTT\n+>67_1\n+TTAAACAATTTGGAATTAATT\n+>68_1\n+TACAAAAAATGCGAAAATTGACCCT\n+>69_1\n+GGACGGAGAACTGATAAGGGCA\n+>70_1\n+TTAAGTTTTAGACATAATCTATTACAA\n+>71_1\n+TTATTATCTATTTTAATTTTTCTTAA\n+>72_1\n+CACCGAACCGGGAAGGCGAACAAC\n+>73_1\n+TCCACCTATTTATCTTTTCTT\n+>74_1\n+TCGCCGTAAAGCCAGTCGTTCTCC\n+>75_1\n+TCAAAGAACAATGTAAAGCCGCGAC\n+>76_1\n+TGGACAAGAACCACGCGACGGGTGT\n+>77_1\n+CCCGAAAAGCCGAGGACGACTTA\n+>78_1\n+TTTTTTATCATTTTTCACCTAAAAAA\n+>79_1\n+TAGAACTCGAACCAGAGCTCC\n+>80_1\n+TCTATATTATTTTTATCAATTTTCACC\n+>81_1\n+TCTATTTCTTTATTTTTTTTATTAT\n+>82_1\n+TTTGATACCTTTATACCATACCTATT\n+>83_1\n+ATAAAGCTAGATTACCAAAGCAT\n+>84_1\n+GCCAACGACCATACCACGA\n+>85_1\n+CGGCACATGTTGAATTACACTCA\n+>86_1\n+TACTATTTTATTATACATACATACATTA\n+>87_1\n+TTAATGACACACGGGAAAAACACCG\n+>88_1\n+TAGTTTCACTACTTTATTCTTTTTA\n+>89_1\n+AACAGGGAGATCAACAGCGTTGACA\n+>90_1\n+CGATATTTTCTCCTCTGACC\n+>91_1\n+ACCAGCACCTTCCGACTCAACGTCAAA\n+>92_1\n+AAGGAATTAAAGCAATAATTCTAA\n+>93_1\n+TACAAAACAAACAAATTACAATCTAAA\n+>94_1\n+CAATTTTTAATTCCTTTTTTCTTCTT\n+>95_1\n+TACAGACAACACATACGGACTTAA\n+>96_1\n+TCTGTATTTGACTTATTACTTTCTCC\n+>97_1\n+TGAGCTAGAACTGCACCCACTCCA\n+>98_1\n+CGCCGCAAGATGAATACTCTAATGA\n+>99_1\n+TATTTCTTTTTTAACTTCTTTTC\n+>100_1\n+TTACAATCTACTATTCTTTTATTA\n+>101_1\n+TTTAAACACTTCCTACATCAAATTTC\n+>102_1\n+TGTGTAATCTTTCTACTTCTTCTAC\n+>103_1\n+TCTATTCATACAAAACACTAATACCC\n+>104_1\n+TGGAGTAGCACAGTCGTCTGAAATC\n+>105_1\n+AAGCACGCCTTACCACAATTTATAA\n+>106_1\n+CTGGAAACTATTGATCAAATT\n+>107_1\n+TACACAGACTTACAAAACACATCCTTC\n+>108_1\n+TTCAAGTAGATTGCATTTTTTAATA\n+>109_1\n+TTATTACATCGTCCACATATAACAAAA\n+>110_1\n+CAAGGCTCAGAAGAACATCACCAAGACC\n+>111_1\n+TGAGGAAAACAGAAAAATGAGAGACA\n+>112_1\n+TCAAAAAGTAATAGGGATCGTTA\n+>113_1\n+TAACTTTAACTTTTTTACT\n+>114_1\n+TATTCCGACAATACCTTCTTTAC\n+>115_1\n+TTTGTTTTTTACTATATTT\n+>116_1\n+TTCATTTTATTTTTAAATATCTTTTTT\n+>117_1\n+TACTCAATAGAACTCTACTCACTCATA\n+>118_1\n+TGAAAGGAAAAACAGGACACGGGA\n+>119_1\n+AAAATCGACTGCCGAAAACATTTTAA\n+>120_1\n+TAC'..b'>132_1\n+CAAATAACAAACTGAATAAACGAAA\n+>133_1\n+TGAGAATGACTTCTTCACGATCTCTT\n+>134_1\n+TCTTATTATCATTTTTTTATCCCTT\n+>135_1\n+TCAAATGCAAATTGGATTTATGA\n+>136_1\n+CCTTACTCAACATACTTAATCATACTTA\n+>137_1\n+TAGACTTTCTACTCATTATTAC\n+>138_1\n+TGAAACTGAAACTAACATACAAAATATT\n+>139_1\n+AAAACCCGGACAAACCATCGGAGGA\n+>140_1\n+TACAGACAACACATACGGACTTAAGT\n+>141_1\n+TATTTCAGCAACAGACTAAGACTAA\n+>142_1\n+AACTTTAAATTTTTAATAACCTT\n+>143_1\n+TATTTATAAATTTTTTCTTGAGAC\n+>144_1\n+TTAATATGTAATTTCATACCTCAC\n+>145_1\n+CACAGACTGAGGCAGAAAAAACAA\n+>146_1\n+TAAAGAAGAAGAATTGATTTTAAT\n+>147_1\n+TACTGAAAACGGGCGCATATCAGTGG\n+>148_1\n+TCAGTCTTTTTTTCTCTCCTA\n+>149_1\n+TATAATTTTATTTTATATTTTCTCT\n+>150_1\n+NATTCTTACTCCATTTCAATTTACT\n+>151_1\n+TTGTAAAACATTCTTTCTCCTGAC\n+>152_1\n+TAATTACCATTGCTAACTATCCA\n+>153_1\n+TTCTTCCTTTTATCCTCTCTTAA\n+>154_1\n+TCTAAACACCCACGAAAATCTCTTAC\n+>155_1\n+AAAAACACACAGACACAAGCAGCAAT\n+>156_1\n+CGGACGGTATATTTTTTAATATAA\n+>157_1\n+TATGGAGAAACAGCGATATAAGTCA\n+>158_1\n+TACAACTAACATCCTTTCTTCTTCC\n+>159_1\n+AACTCTCTAATTTAACTTTGTGC\n+>160_1\n+TCCTGAGGACGAGGGGCGTTTAGC\n+>161_1\n+TATTTCCAACCTTCAACCTCAAATAA\n+>162_1\n+TGGACGGAGAACTGATAAGGGC\n+>163_1\n+TTTAAGACTTATGAGCTTG\n+>164_1\n+TTAAAGACGCAACAACTAACATT\n+>165_1\n+TAGGAACTTCATACCGGTCTC\n+>166_1\n+CGATATTTTCTCCTCTTACC\n+>167_1\n+GAGGATTAAAAGAACGGTTTATAA\n+>168_1\n+GAATGATCGCACCACCACCTCAACGTT\n+>169_1\n+TTTTCTTTACCCATCTTTACTTTCCC\n+>170_1\n+AAGACAACAATGACATATAAGACG\n+>171_1\n+TAATAATTTAAATAAATATAAATTT\n+>172_1\n+TACTGAAACAAGGAAACACAAGC\n+>173_1\n+TCAGAAGAACAGAGAATTGATTTT\n+>174_1\n+CATACCTTAAATTATCTCTTTCTT\n+>175_1\n+TTCTTTTACTACATATTTTTTATTTTT\n+>176_1\n+AAAAAATATCTTTTTTAACTCGTGGCC\n+>177_1\n+TAACAAATAGAACGTTCTAATTTAAA\n+>178_1\n+TAGTTACCTTCATATCTCTCTTTA\n+>179_1\n+TAAAATTGTAATATTTAAATAATAT\n+>180_1\n+AAAAGGAAAAACAGAAAAATTGGG\n+>181_1\n+AGATGTTGATCTAAACTCTCCCA\n+>182_1\n+TACCTCTTTATTAACCTCCACCTCTA\n+>183_1\n+TTTCCGACAAATACACCATCTTC\n+>184_1\n+ACAAATCATAAATTTTTTTTTACT\n+>185_1\n+GACGAAACGCAACAACAAAATGGACG\n+>186_1\n+TACAAATTTTTTTTTCTTTCTTAT\n+>187_1\n+TACACCTCTTTTTACTTTTTTATT\n+>188_1\n+TATGGATTATTTCAAAATTTTTTTTT\n+>189_1\n+TTCTAGCACAACACGCACACATATA\n+>190_1\n+TAACTACTTTTACATTAATACTAA\n+>191_1\n+TCTCATCTTACAATTTTTTAAAACTT\n+>192_1\n+TTCTTGGACTACACATTTTTTATTGTTTTA\n+>193_1\n+TACACACTCATCAACCAAAGTACGTA\n+>194_1\n+TACTATATACTTCTTCAAATCACA\n+>195_1\n+TCAGAGTTCTACAGGTCCTACGATT\n+>196_1\n+TGATTTACTTACATTCTTTTTTT\n+>197_1\n+CCATATATGACTGACTCATTTCAC\n+>198_1\n+GAAGAGGAGGAGGAGTTTGTAAG\n+>199_1\n+AAAGACAAAAGAAATACAGGCACT\n+>200_1\n+TACAAGACTAAAACAAACGTGAAGT\n+>201_1\n+TAACGGAGCACGAGAACGAAGTGG\n+>202_1\n+CTTCTTTTACTACATATTTTTTATTTTTTTA\n+>203_1\n+TAATAAGAAACTGTTCAAACAATCCAC\n+>204_1\n+TGAGCGGAGAACCAGAGTTGATGAGC\n+>205_1\n+TATTATTTTTTTATTCCATTCATAT\n+>206_1\n+TTTATTACTTAGTCATAATTCCAA\n+>207_1\n+TTTTATATTTCCTTATATCTTTACTA\n+>208_1\n+AACGGGGAATAAGGGTTCG\n+>209_1\n+AATCTACAATTTCCATTACGACTCC\n+>210_1\n+CCGACCGAGCAAATAAACACAGGAACG\n+>211_1\n+TCCACAACAACTCTATCTAAAGCATT\n+>212_1\n+TTCTTGATAACGCATCTTCTACAT\n+>213_1\n+TGCTTGGACTACATATGGTTGAGGG\n+>214_1\n+CAGATTCACTGATTTTCTTACGCC\n+>215_1\n+TTTGTTTTTCATTTTTTTATCTTT\n+>216_1\n+CTATATTTTCTCTCTTACC\n+>217_1\n+TAACCTTGCAGAACTATACGATTCAAA\n+>218_1\n+TAAGAAACTGAGCTAACGCAATGTACC\n+>219_1\n+TTCTTTTACTACATATTTTTTATTTTTTTA\n+>220_1\n+TATCTATCTTTGATCTTCTTTTCA\n+>221_1\n+TAATAAATTATTAAATAAAAAAAAAA\n+>222_1\n+TTTTTTATCAATTTTCACCATTCAT\n+>223_1\n+TATTTCACTTTATACTTCCTTAA\n+>224_1\n+TAGTTTTAAATATTTCTTTTTTTC\n+>225_1\n+TTCTTTTACTACATATTGTTTATTTTTTTA\n+>226_1\n+GAGAATAAATATTTCAATGGTCTATTG\n+>227_1\n+CGATATTTTCTCCTCTTACCT\n+>228_1\n+CACGACTTTATTCTTTTTATCTCA\n+>229_1\n+TAGTGGACTTTAAAAAAAAAAAAAAAAAA\n+>230_1\n+CATAATATAAACTTATCTT\n+>231_1\n+ATGAAATTCGAACAATACGTC\n+>232_1\n+AACAACTGCAAACATCTACCACA\n+>233_1\n+TAAAAATAATTGTCTTTAATTTCA\n+>234_1\n+CGCAACCAGCAGCAACTCCTAGCAT\n+>235_1\n+ATTATTAATAAATTATTATAA\n+>236_1\n+CATTAATTCATCCATTTAAACTAA\n+>237_1\n+TCTTATTTTAATCTTCCAATTTC\n+>238_1\n+CTAGACAAGATGCTATAAATTTTAAA\n+>239_1\n+TGACCAAAGACAAACAAACAATAAATA\n+>240_1\n+TTTTTATCAATTTTCACCATTC\n+>241_1\n+TAAGTTTTTAATCATTTTTTTT\n+>242_1\n+TAATCAAAAAACTCTTCATTTTTA\n+>243_1\n+TACAAACGGAACTTTCGTCATAA\n+>244_1\n+TTTTCTTTTTTTCATTTTCTCTTTTA\n+>245_1\n+TAGCCTTTACTAGGCTTTTTCTAA\n+>246_1\n+TTAGTATTAATCTTCACTTAA\n+>247_1\n+TAAAATAAACCAAAACCCAAAAAT\n'
b
diff -r 000000000000 -r a8aacccd79a3 test-data/output.sorted.faw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.sorted.faw Mon Sep 04 07:13:28 2017 -0400
b
b'@@ -0,0 +1,494 @@\n+>100_1\n+>101_1\n+>102_1\n+>103_1\n+>104_1\n+>105_1\n+>106_1\n+>107_1\n+>108_1\n+>109_1\n+>10_1\n+>110_1\n+>111_1\n+>112_1\n+>113_1\n+>114_1\n+>115_1\n+>116_1\n+>117_1\n+>118_1\n+>119_1\n+>11_1\n+>120_1\n+>121_1\n+>122_1\n+>123_1\n+>124_1\n+>125_1\n+>126_1\n+>127_1\n+>128_1\n+>129_1\n+>12_1\n+>130_1\n+>131_1\n+>132_1\n+>133_1\n+>134_1\n+>135_1\n+>136_1\n+>137_1\n+>138_1\n+>139_1\n+>13_1\n+>140_1\n+>141_1\n+>142_1\n+>143_1\n+>144_1\n+>145_1\n+>146_1\n+>147_1\n+>148_1\n+>149_1\n+>14_1\n+>150_1\n+>151_1\n+>152_1\n+>153_1\n+>154_1\n+>155_1\n+>156_1\n+>157_1\n+>158_1\n+>159_1\n+>15_1\n+>160_1\n+>161_1\n+>162_1\n+>163_1\n+>164_1\n+>165_1\n+>166_1\n+>167_1\n+>168_1\n+>169_1\n+>16_1\n+>170_1\n+>171_1\n+>172_1\n+>173_1\n+>174_1\n+>175_1\n+>176_1\n+>177_1\n+>178_1\n+>179_1\n+>17_1\n+>180_1\n+>181_1\n+>182_1\n+>183_1\n+>184_1\n+>185_1\n+>186_1\n+>187_1\n+>188_1\n+>189_1\n+>18_1\n+>190_1\n+>191_1\n+>192_1\n+>193_1\n+>194_1\n+>195_1\n+>196_1\n+>197_1\n+>198_1\n+>199_1\n+>19_1\n+>1_2\n+>200_1\n+>201_1\n+>202_1\n+>203_1\n+>204_1\n+>205_1\n+>206_1\n+>207_1\n+>208_1\n+>209_1\n+>20_1\n+>210_1\n+>211_1\n+>212_1\n+>213_1\n+>214_1\n+>215_1\n+>216_1\n+>217_1\n+>218_1\n+>219_1\n+>21_1\n+>220_1\n+>221_1\n+>222_1\n+>223_1\n+>224_1\n+>225_1\n+>226_1\n+>227_1\n+>228_1\n+>229_1\n+>22_1\n+>230_1\n+>231_1\n+>232_1\n+>233_1\n+>234_1\n+>235_1\n+>236_1\n+>237_1\n+>238_1\n+>239_1\n+>23_1\n+>240_1\n+>241_1\n+>242_1\n+>243_1\n+>244_1\n+>245_1\n+>246_1\n+>247_1\n+>24_1\n+>25_1\n+>26_1\n+>27_1\n+>28_1\n+>29_1\n+>2_2\n+>30_1\n+>31_1\n+>32_1\n+>33_1\n+>34_1\n+>35_1\n+>36_1\n+>37_1\n+>38_1\n+>39_1\n+>3_2\n+>40_1\n+>41_1\n+>42_1\n+>43_1\n+>44_1\n+>45_1\n+>46_1\n+>47_1\n+>48_1\n+>49_1\n+>4_1\n+>50_1\n+>51_1\n+>52_1\n+>53_1\n+>54_1\n+>55_1\n+>56_1\n+>57_1\n+>58_1\n+>59_1\n+>5_1\n+>60_1\n+>61_1\n+>62_1\n+>63_1\n+>64_1\n+>65_1\n+>66_1\n+>67_1\n+>68_1\n+>69_1\n+>6_1\n+>70_1\n+>71_1\n+>72_1\n+>73_1\n+>74_1\n+>75_1\n+>76_1\n+>77_1\n+>78_1\n+>79_1\n+>7_1\n+>80_1\n+>81_1\n+>82_1\n+>83_1\n+>84_1\n+>85_1\n+>86_1\n+>87_1\n+>88_1\n+>89_1\n+>8_1\n+>90_1\n+>91_1\n+>92_1\n+>93_1\n+>94_1\n+>95_1\n+>96_1\n+>97_1\n+>98_1\n+>99_1\n+>9_1\n+AAAAAATATCTTTTTTAACTCGTGGCC\n+AAAAACACACAGACACAAGCAGCAAT\n+AAAACCCGGACAAACCATCGGAGGA\n+AAAAGGAAAAACAGAAAAATTGGG\n+AAAATCGACTGCCGAAAACATTTTAA\n+AAAGACAAAAGAAATACAGGCACT\n+AAATTGCAAAGATGGAAAATAAAACT\n+AACAACTGCAAACATCTACCACA\n+AACAGGAAAAACAGAAGGATTTCTA\n+AACAGGGAGATCAACAGCGTTGACA\n+AACATTTTATCAATTATACATTA\n+AACGGGGAATAAGGGTTCG\n+AACTCTCTAATTTAACTTTGTGC\n+AACTTTAAATTTTTAATAACCTT\n+AAGACAACAATGACATATAAGACG\n+AAGATGGAGTAGTTTTTT\n+AAGCACGCCTTACCACAATTTATAA\n+AAGGAATTAAAGCAATAATTCTAA\n+AAGTGAAGAAGTAGTTTTT\n+AATATAAAAATACAATCAACCATTGCA\n+AATCTACAATTTCCATTACGACTCC\n+AATGACACACTCTTCATCAAC\n+AATGTCACTTGAAGAATTCACGT\n+AATTGCAACAGAGACTGGAA\n+AATTTATTTAATTTATATTCTAACTAA\n+ACAAATCATAAATTTTTTTTTACT\n+ACAACCTCAACTCATATTT\n+ACAGCAGGACGGTGATCA\n+ACCAGCACCTTCCGACTCAACGTCAAA\n+ACTAAACTTTTCTTACCATATTTCTA\n+AGAACAATTAAATAAAATAGCATA\n+AGATGTTGATCTAAACTCTCCCA\n+AGGAATATGATGAAATAAAAAAAT\n+AGGATTTTTAAGCCCATATGTTTCC\n+ATAAAGCTAGATTACCAAAGCAT\n+ATGAAATTCGAACAATACGTC\n+ATGTTATTTACTTTTTCCCCTTATA\n+ATTATTAATAAATTATTATAA\n+ATTTTACTTCATCATTTTC\n+CAAACGGAACAAGACATCACCATC\n+CAAATAACAAACTGAATAAACGAAA\n+CAACACATGACGCGACAATTCTTG\n+CAAGAATACAAAAAATACTAATTA\n+CAAGATATGAACAAAGCAAAGACAC\n+CAAGGCTCAGAAGAACATCACCAAGACC\n+CAATTTTTAATTCCTTTTTTCTTCTT\n+CACAGACTGAGGCAGAAAAAACAA\n+CACCGAACCGGGAAGGCGAACAAC\n+CACGACTTTATTCTTTTTATCTCA\n+CAGATTCACTGATTTTCTTACGCC\n+CAGTTTCACAAAAGATCTTTTAA\n+CATAATATAAACTTATCTT\n+CATACCTACAAAAAAGCTTCTCTTAC\n+CATACCTTAAATTATCTCTTTCTT\n+CATTAATTCATCCATTTAAACTAA\n+CCAGAAAACAATACAACATCCTCA\n+CCATATATGACTGACTCATTTCAC\n+CCCGAAAAGCCGAGGACGACTTA\n+CCGACCGAGCAAATAAACACAGGAACG\n+CCGCGATCTGCTTATTTATAATCTT\n+CCTTACTCAACATACTTAATCATACTTA\n+CGATATTTTCTCCTCGTACC\n+CGATATTTTCTCCTCTGACC\n+CGATATTTTCTCCTCTTACC\n+CGATATTTTCTCCTCTTACCT\n+CGCAACCAGCAGCAACTCCTAGCAT\n+CGCCGCAAGATGAATACTCTAATGA\n+CGGAAAAGAATGTAGACCATTTAA\n+CGGACGGTATATTTTTTAATATAA\n+CGGCACATGTTGAATTACACTCA\n+CTAGACAAGATGCTATAAATTTTAAA\n+CTATATTTTCTCTCTTACC\n+CTGGAAACTATTGATCAAATT\n+CTTCTTTTACTACATATTTTTTATTTTTTTA\n+GAAACAAACAACACATACCCTCTGGC\n+GAAACCATTATCTTATCTTTATACA\n+GAAAGGAAGGGAAGAAAGCGAAAGGA\n+GAACAATTTTTCAATTTTTTACATTA\n+GAAGAGGAGGAGGAGTTTGTAAG\n+GAATGATCGCACCACCACCTCAACGTT\n+GACGAAACGCAACAACAAAATGGACG\n+GAGAACTTTTAATCATTTTAC\n'..b'GAACGTTCTAATTTAAA\n+TAACATAAATTTTAATCATAAATTG\n+TAACCTTGCAGAACTATACGATTCAAA\n+TAACGGAGCACGAGAACGAAGTGG\n+TAACTACTTTTACATTAATACTAA\n+TAACTTTAACTTTTTTACT\n+TAAGAAACTGAGCTAACGCAATGTACC\n+TAAGTTTTTAATCATTTTTTTT\n+TAATAAATTATTAAATAAAAAAAAAA\n+TAATAAGAAACTGTTCAAACAATCCAC\n+TAATAATTTAAATAAATATAAATTT\n+TAATCAAAAAACTCTTCATTTTTA\n+TAATTACCATTGCTAACTATCCA\n+TACAAAAAATGCGAAAATTGACCCT\n+TACAAAACAAACAAATTACAATCTAAA\n+TACAAACGGAACTTTCGTCATAA\n+TACAAACGGAACTTTCTTCATAACTTC\n+TACAAACGTAATTTTCGCATAACATC\n+TACAAATTTTTTTTTCTTTCTTAT\n+TACAACTAACATCCTTTCTTCTTCC\n+TACAAGACTAAAACAAACGTGAAGT\n+TACACACTCATCAACCAAAGGACG\n+TACACACTCATCAACCAAAGTACGTA\n+TACACAGACTTACAAAACACATCCTTC\n+TACACCTCTTTTTACTTTTTTATT\n+TACAGACAACACATACGGACTTAA\n+TACAGACAACACATACGGACTTAAGT\n+TACAGAGAAATATACAACACTCACC\n+TACCTCTTTATTAACCTCCACCTCTA\n+TACTATATACTTCTTCAAATCACA\n+TACTATTTTATTATACATACATACATTA\n+TACTCAATAGAACTCTACTCACTCATA\n+TACTGAAAACGGGCGCATATCAGTGG\n+TACTGAAACAAGGAAACACAAGC\n+TACTTTTTTCTTAATTTTTTATTAAAC\n+TAGAACTCGAACCAGAGCTCC\n+TAGACTTTCTACTCATTATTAC\n+TAGCCTTTACTAGGCTTTTTCTAA\n+TAGCGAGATGGACCAACGTGCTGT\n+TAGGAACTTCATACCGGTCTC\n+TAGGTACTTACCTTTTTTTTACACAA\n+TAGTCATACATACCTAATTATACATA\n+TAGTGGACTTTAAAAAAAAAAAAAAAAAA\n+TAGTTACCTTCATATCTCTCTTTA\n+TAGTTTCACTACTTTATTCTTTTTA\n+TAGTTTTAAATATTTCTTTTTTTC\n+TATAATTTTATTTTATATTTTCTCT\n+TATATAAATCTTCAACATCAA\n+TATATTGCCTCCCCATAATCCTT\n+TATCTATCTTTGATCTTCTTTTCA\n+TATCTGATCAACAATCTTTTCCCAT\n+TATGGAGAAACAGCGATATAAGTCA\n+TATGGATTATTTCAAAATTTTTTTTT\n+TATTATACATAGAATAACAAATCTTT\n+TATTATCATCTCGTTCTTCCTTCTC\n+TATTATTTTTTTATTCCATTCATAT\n+TATTCAATCACTCCATTATATATAACA\n+TATTCCGACAATACCTTCTTTAC\n+TATTTATAAATTTTTTCTTGAGAC\n+TATTTCACTTTATACTTCCTTAA\n+TATTTCAGCAACAGACTAAGACTAA\n+TATTTCCAACCTTCAACCTCAAATAA\n+TATTTCTTTTTTAACTTCTTTTC\n+TCAAAAAGTAATAGGGATCGTTA\n+TCAAAGAACAATGTAAAGCCGCGAC\n+TCAAATGCAAATTGGATTTATGA\n+TCAACTGGCAAGAATTTTTGAAAATT\n+TCAAGCCTTTTGAAGAACTGACCTAAA\n+TCAGAAGAACAGAGAATTGATTTT\n+TCAGAGTTCTACAGGTCCTACGATT\n+TCAGTCTTTTTTTCTCTCCTA\n+TCATTACACTTCTTACAAAAC\n+TCCACAACAACTCTATCTAAAGCATT\n+TCCACCTATTTATCTTTTCTT\n+TCCCAACCCTCGAGCATCATTTTC\n+TCCGAAAAATCGTAGGACCCGGGCA\n+TCCGAAAACAAGGCCCGTCGCT\n+TCCTGAGGACGAGGGGCGTTTAGC\n+TCGCCGTAAAGCCAGTCGTTCTCC\n+TCTAAACACCCACGAAAATCTCTTAC\n+TCTAGTCTGAGCGTAGTACCAGATTG\n+TCTATATTATTTTTATCAATTTTCACC\n+TCTATTCATACAAAACACTAATACCC\n+TCTATTTCTTTATTTTTTTTATTAT\n+TCTCATCTTACAATTTTTTAAAACTT\n+TCTCTTTTATTTTTATCTTTCCTT\n+TCTGTATTTGACTTATTACTTTCTCC\n+TCTTATTATCATTTTTTTATCCCTT\n+TCTTATTTTAATCTTCCAATTTC\n+TCTTCTATATAATCCTTTATTATAA\n+TCTTTTTTTTAATACTTATTTTCATT\n+TGAAACTGAAACTAACATACAAAATATT\n+TGAAAGGAAAAACAGGACACGGGA\n+TGACCAAAGACAAACAAACAATAAATA\n+TGAGAATGACTTCTTCACGATCTCTT\n+TGAGCGGAGAACCAGAGTTGATGAGC\n+TGAGCTAGAACTGCACCCACTCCA\n+TGAGGAAAACAGAAAAATGAGAGACA\n+TGATGACGGGCAGCAGGGATTTTC\n+TGATTTACTTACATTCTTTTTTT\n+TGCTTGGACTACATATGGTTGAGGG\n+TGCTTGGACTACATATGGTTGAGTG\n+TGCTTTTACTACATATTTTTTATTTTTTTA\n+TGGAATGTAAAGAAGTATGGAG\n+TGGACAAGAACCACGCGACGGGTGT\n+TGGACGGAGAACTGATAAGGGC\n+TGGAGTAGCACAGTCGTCTGAAATC\n+TGTGTAATCTTTCTACTTCTTCTAC\n+TTAAACAATTTGGAATTAATT\n+TTAAAGACGCAACAACTAACATT\n+TTAAGTTTTAGACATAATCTATTACAA\n+TTAATATGTAATTTCATACCTCAC\n+TTAATGACACACGGGAAAAACACCG\n+TTACAATCTACTATTCTTTTATTA\n+TTACTAGATCCACCCTCATTA\n+TTAGTATTAATCTTCACTTAA\n+TTATAATCACGGCACCCTATACA\n+TTATTACATCGTCCACATATAACAAAA\n+TTATTATCTATTTTAATTTTTCTTAA\n+TTCAAGTAGATTGCATTTTTTAATA\n+TTCATTTTATTTTTAAATATCTTTTTT\n+TTCTAGCACAACACGCACACATATA\n+TTCTTCCTTTTATCCTCTCTTAA\n+TTCTTGATAACGCATCTTCTACAT\n+TTCTTGGACTACACATTTTTTATTGTTTTA\n+TTCTTTGACTACATATTTTTTATT\n+TTCTTTTACTACATATTGTTTATTTTTTTA\n+TTCTTTTACTACATATTTTTTATTTTT\n+TTCTTTTACTACATATTTTTTATTTTTTTA\n+TTGATTCTTCTTTTTCACAAAA\n+TTGTAAAACATTCTTTCTCCTGAC\n+TTTAAACACTTCCTACATCAAATTTC\n+TTTAAGACTTATGAGCTTG\n+TTTACCAGAGGAGTCGAGTTTTT\n+TTTATTACAACCCTATCTTACCTCAA\n+TTTATTACTTAGTCATAATTCCAA\n+TTTCCGACAAATACACCATCTTC\n+TTTGATACCTTTATACCATACCTATT\n+TTTGTTTTTCATTTTTTTATCTTT\n+TTTGTTTTTTACTATATTT\n+TTTTATATTTCCTTATATCTTTACTA\n+TTTTCATTTCTTCTTCAAATCCTTT\n+TTTTCTTTACCCATCTTTACTTTCCC\n+TTTTCTTTTTTTCATTTTCTCTTTTA\n+TTTTTAACTCATTTTACAATTAAAC\n+TTTTTAACTCCCATCATTTTTCCTC\n+TTTTTATCAATTTTCACCATTC\n+TTTTTTATCAATTTTCACCATTCAT\n+TTTTTTATCATTTTTCACCTAAAAAA\n+TTTTTTTTTTGTTTTTATTTTTATCAT\n'
b
diff -r 000000000000 -r a8aacccd79a3 test-data/output.sorted.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.sorted.tab Mon Sep 04 07:13:28 2017 -0400
b
@@ -0,0 +1,247 @@
+AAAAAATATCTTTTTTAACTCGTGGCC 1
+AAAAACACACAGACACAAGCAGCAAT 1
+AAAACCCGGACAAACCATCGGAGGA 1
+AAAAGGAAAAACAGAAAAATTGGG 1
+AAAATCGACTGCCGAAAACATTTTAA 1
+AAAGACAAAAGAAATACAGGCACT 1
+AAATTGCAAAGATGGAAAATAAAACT 1
+AACAACTGCAAACATCTACCACA 1
+AACAGGAAAAACAGAAGGATTTCTA 1
+AACAGGGAGATCAACAGCGTTGACA 1
+AACATTTTATCAATTATACATTA 1
+AACGGGGAATAAGGGTTCG 1
+AACTCTCTAATTTAACTTTGTGC 1
+AACTTTAAATTTTTAATAACCTT 1
+AAGACAACAATGACATATAAGACG 1
+AAGATGGAGTAGTTTTTT 1
+AAGCACGCCTTACCACAATTTATAA 1
+AAGGAATTAAAGCAATAATTCTAA 1
+AAGTGAAGAAGTAGTTTTT 1
+AATATAAAAATACAATCAACCATTGCA 1
+AATCTACAATTTCCATTACGACTCC 1
+AATGACACACTCTTCATCAAC 1
+AATGTCACTTGAAGAATTCACGT 1
+AATTGCAACAGAGACTGGAA 1
+AATTTATTTAATTTATATTCTAACTAA 1
+ACAAATCATAAATTTTTTTTTACT 1
+ACAACCTCAACTCATATTT 1
+ACAGCAGGACGGTGATCA 2
+ACCAGCACCTTCCGACTCAACGTCAAA 1
+ACTAAACTTTTCTTACCATATTTCTA 1
+AGAACAATTAAATAAAATAGCATA 1
+AGATGTTGATCTAAACTCTCCCA 1
+AGGAATATGATGAAATAAAAAAAT 1
+AGGATTTTTAAGCCCATATGTTTCC 1
+ATAAAGCTAGATTACCAAAGCAT 1
+ATGAAATTCGAACAATACGTC 1
+ATGTTATTTACTTTTTCCCCTTATA 1
+ATTATTAATAAATTATTATAA 1
+ATTTTACTTCATCATTTTC 1
+CAAACGGAACAAGACATCACCATC 1
+CAAATAACAAACTGAATAAACGAAA 1
+CAACACATGACGCGACAATTCTTG 1
+CAAGAATACAAAAAATACTAATTA 1
+CAAGATATGAACAAAGCAAAGACAC 1
+CAAGGCTCAGAAGAACATCACCAAGACC 1
+CAATTTTTAATTCCTTTTTTCTTCTT 1
+CACAGACTGAGGCAGAAAAAACAA 1
+CACCGAACCGGGAAGGCGAACAAC 1
+CACGACTTTATTCTTTTTATCTCA 1
+CAGATTCACTGATTTTCTTACGCC 1
+CAGTTTCACAAAAGATCTTTTAA 1
+CATAATATAAACTTATCTT 1
+CATACCTACAAAAAAGCTTCTCTTAC 1
+CATACCTTAAATTATCTCTTTCTT 1
+CATTAATTCATCCATTTAAACTAA 1
+CCAGAAAACAATACAACATCCTCA 1
+CCATATATGACTGACTCATTTCAC 1
+CCCGAAAAGCCGAGGACGACTTA 1
+CCGACCGAGCAAATAAACACAGGAACG 1
+CCGCGATCTGCTTATTTATAATCTT 1
+CCTTACTCAACATACTTAATCATACTTA 1
+CGATATTTTCTCCTCGTACC 1
+CGATATTTTCTCCTCTGACC 1
+CGATATTTTCTCCTCTTACC 1
+CGATATTTTCTCCTCTTACCT 1
+CGCAACCAGCAGCAACTCCTAGCAT 1
+CGCCGCAAGATGAATACTCTAATGA 1
+CGGAAAAGAATGTAGACCATTTAA 1
+CGGACGGTATATTTTTTAATATAA 1
+CGGCACATGTTGAATTACACTCA 1
+CTAGACAAGATGCTATAAATTTTAAA 1
+CTATATTTTCTCTCTTACC 1
+CTGGAAACTATTGATCAAATT 1
+CTTCTTTTACTACATATTTTTTATTTTTTTA 1
+GAAACAAACAACACATACCCTCTGGC 1
+GAAACCATTATCTTATCTTTATACA 1
+GAAAGGAAGGGAAGAAAGCGAAAGGA 1
+GAACAATTTTTCAATTTTTTACATTA 1
+GAAGAGGAGGAGGAGTTTGTAAG 1
+GAATGATCGCACCACCACCTCAACGTT 1
+GACGAAACGCAACAACAAAATGGACG 1
+GAGAACTTTTAATCATTTTAC 1
+GAGAATAAATATTTCAATGGTCTATTG 1
+GAGGATTAAAAGAACGGTTTATAA 1
+GCAGATAGAAATCAATACAAAAATC 1
+GCCAACGACCATACCACGA 1
+GCCGGGGCGTGAGATGTCTGCATTA 1
+GGACGGAGAACTGATAAGGGCA 1
+GGACGGAGAACTGATAATGGC 1
+GGAGATTGTAGAACGAAAGGAAAAT 1
+GGTATCTTTATATTTTAATTTTCTT 1
+NATTCTTACTCCATTTCAATTTACT 1
+TAAAAATAATTGTCTTTAATTTCA 1
+TAAAATAAACCAAAACCCAAAAAT 1
+TAAAATAAATAAGTCCGACGACAA 1
+TAAAATTGTAATATTTAAATAATAT 1
+TAAAGAAGAAGAATTGATTTTAAT 1
+TAACAAATAGAACGTTCTAATTTAAA 1
+TAACATAAATTTTAATCATAAATTG 1
+TAACCTTGCAGAACTATACGATTCAAA 1
+TAACGGAGCACGAGAACGAAGTGG 1
+TAACTACTTTTACATTAATACTAA 1
+TAACTTTAACTTTTTTACT 1
+TAAGAAACTGAGCTAACGCAATGTACC 1
+TAAGTTTTTAATCATTTTTTTT 1
+TAATAAATTATTAAATAAAAAAAAAA 1
+TAATAAGAAACTGTTCAAACAATCCAC 1
+TAATAATTTAAATAAATATAAATTT 1
+TAATCAAAAAACTCTTCATTTTTA 1
+TAATTACCATTGCTAACTATCCA 1
+TACAAAAAATGCGAAAATTGACCCT 1
+TACAAAACAAACAAATTACAATCTAAA 1
+TACAAACGGAACTTTCGTCATAA 1
+TACAAACGGAACTTTCTTCATAACTTC 1
+TACAAACGTAATTTTCGCATAACATC 1
+TACAAATTTTTTTTTCTTTCTTAT 1
+TACAACTAACATCCTTTCTTCTTCC 1
+TACAAGACTAAAACAAACGTGAAGT 1
+TACACACTCATCAACCAAAGGACG 1
+TACACACTCATCAACCAAAGTACGTA 1
+TACACAGACTTACAAAACACATCCTTC 1
+TACACCTCTTTTTACTTTTTTATT 1
+TACAGACAACACATACGGACTTAA 1
+TACAGACAACACATACGGACTTAAGT 1
+TACAGAGAAATATACAACACTCACC 1
+TACCTCTTTATTAACCTCCACCTCTA 1
+TACTATATACTTCTTCAAATCACA 1
+TACTATTTTATTATACATACATACATTA 1
+TACTCAATAGAACTCTACTCACTCATA 1
+TACTGAAAACGGGCGCATATCAGTGG 1
+TACTGAAACAAGGAAACACAAGC 1
+TACTTTTTTCTTAATTTTTTATTAAAC 1
+TAGAACTCGAACCAGAGCTCC 1
+TAGACTTTCTACTCATTATTAC 1
+TAGCCTTTACTAGGCTTTTTCTAA 1
+TAGCGAGATGGACCAACGTGCTGT 1
+TAGGAACTTCATACCGGTCTC 1
+TAGGTACTTACCTTTTTTTTACACAA 1
+TAGTCATACATACCTAATTATACATA 1
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA 1
+TAGTTACCTTCATATCTCTCTTTA 1
+TAGTTTCACTACTTTATTCTTTTTA 1
+TAGTTTTAAATATTTCTTTTTTTC 1
+TATAATTTTATTTTATATTTTCTCT 1
+TATATAAATCTTCAACATCAA 1
+TATATTGCCTCCCCATAATCCTT 1
+TATCTATCTTTGATCTTCTTTTCA 1
+TATCTGATCAACAATCTTTTCCCAT 1
+TATGGAGAAACAGCGATATAAGTCA 1
+TATGGATTATTTCAAAATTTTTTTTT 1
+TATTATACATAGAATAACAAATCTTT 1
+TATTATCATCTCGTTCTTCCTTCTC 1
+TATTATTTTTTTATTCCATTCATAT 1
+TATTCAATCACTCCATTATATATAACA 1
+TATTCCGACAATACCTTCTTTAC 1
+TATTTATAAATTTTTTCTTGAGAC 1
+TATTTCACTTTATACTTCCTTAA 1
+TATTTCAGCAACAGACTAAGACTAA 1
+TATTTCCAACCTTCAACCTCAAATAA 1
+TATTTCTTTTTTAACTTCTTTTC 1
+TCAAAAAGTAATAGGGATCGTTA 1
+TCAAAGAACAATGTAAAGCCGCGAC 1
+TCAAATGCAAATTGGATTTATGA 1
+TCAACTGGCAAGAATTTTTGAAAATT 1
+TCAAGCCTTTTGAAGAACTGACCTAAA 1
+TCAGAAGAACAGAGAATTGATTTT 1
+TCAGAGTTCTACAGGTCCTACGATT 1
+TCAGTCTTTTTTTCTCTCCTA 1
+TCATTACACTTCTTACAAAAC 1
+TCCACAACAACTCTATCTAAAGCATT 1
+TCCACCTATTTATCTTTTCTT 1
+TCCCAACCCTCGAGCATCATTTTC 1
+TCCGAAAAATCGTAGGACCCGGGCA 1
+TCCGAAAACAAGGCCCGTCGCT 1
+TCCTGAGGACGAGGGGCGTTTAGC 1
+TCGCCGTAAAGCCAGTCGTTCTCC 1
+TCTAAACACCCACGAAAATCTCTTAC 1
+TCTAGTCTGAGCGTAGTACCAGATTG 1
+TCTATATTATTTTTATCAATTTTCACC 1
+TCTATTCATACAAAACACTAATACCC 1
+TCTATTTCTTTATTTTTTTTATTAT 1
+TCTCATCTTACAATTTTTTAAAACTT 1
+TCTCTTTTATTTTTATCTTTCCTT 1
+TCTGTATTTGACTTATTACTTTCTCC 1
+TCTTATTATCATTTTTTTATCCCTT 1
+TCTTATTTTAATCTTCCAATTTC 1
+TCTTCTATATAATCCTTTATTATAA 1
+TCTTTTTTTTAATACTTATTTTCATT 1
+TGAAACTGAAACTAACATACAAAATATT 1
+TGAAAGGAAAAACAGGACACGGGA 1
+TGACCAAAGACAAACAAACAATAAATA 1
+TGAGAATGACTTCTTCACGATCTCTT 1
+TGAGCGGAGAACCAGAGTTGATGAGC 1
+TGAGCTAGAACTGCACCCACTCCA 1
+TGAGGAAAACAGAAAAATGAGAGACA 1
+TGATGACGGGCAGCAGGGATTTTC 1
+TGATTTACTTACATTCTTTTTTT 1
+TGCTTGGACTACATATGGTTGAGGG 1
+TGCTTGGACTACATATGGTTGAGTG 2
+TGCTTTTACTACATATTTTTTATTTTTTTA 1
+TGGAATGTAAAGAAGTATGGAG 2
+TGGACAAGAACCACGCGACGGGTGT 1
+TGGACGGAGAACTGATAAGGGC 1
+TGGAGTAGCACAGTCGTCTGAAATC 1
+TGTGTAATCTTTCTACTTCTTCTAC 1
+TTAAACAATTTGGAATTAATT 1
+TTAAAGACGCAACAACTAACATT 1
+TTAAGTTTTAGACATAATCTATTACAA 1
+TTAATATGTAATTTCATACCTCAC 1
+TTAATGACACACGGGAAAAACACCG 1
+TTACAATCTACTATTCTTTTATTA 1
+TTACTAGATCCACCCTCATTA 1
+TTAGTATTAATCTTCACTTAA 1
+TTATAATCACGGCACCCTATACA 1
+TTATTACATCGTCCACATATAACAAAA 1
+TTATTATCTATTTTAATTTTTCTTAA 1
+TTCAAGTAGATTGCATTTTTTAATA 1
+TTCATTTTATTTTTAAATATCTTTTTT 1
+TTCTAGCACAACACGCACACATATA 1
+TTCTTCCTTTTATCCTCTCTTAA 1
+TTCTTGATAACGCATCTTCTACAT 1
+TTCTTGGACTACACATTTTTTATTGTTTTA 1
+TTCTTTGACTACATATTTTTTATT 1
+TTCTTTTACTACATATTGTTTATTTTTTTA 1
+TTCTTTTACTACATATTTTTTATTTTT 1
+TTCTTTTACTACATATTTTTTATTTTTTTA 1
+TTGATTCTTCTTTTTCACAAAA 1
+TTGTAAAACATTCTTTCTCCTGAC 1
+TTTAAACACTTCCTACATCAAATTTC 1
+TTTAAGACTTATGAGCTTG 1
+TTTACCAGAGGAGTCGAGTTTTT 1
+TTTATTACAACCCTATCTTACCTCAA 1
+TTTATTACTTAGTCATAATTCCAA 1
+TTTCCGACAAATACACCATCTTC 1
+TTTGATACCTTTATACCATACCTATT 1
+TTTGTTTTTCATTTTTTTATCTTT 1
+TTTGTTTTTTACTATATTT 1
+TTTTATATTTCCTTATATCTTTACTA 1
+TTTTCATTTCTTCTTCAAATCCTTT 1
+TTTTCTTTACCCATCTTTACTTTCCC 1
+TTTTCTTTTTTTCATTTTCTCTTTTA 1
+TTTTTAACTCATTTTACAATTAAAC 1
+TTTTTAACTCCCATCATTTTTCCTC 1
+TTTTTATCAATTTTCACCATTC 1
+TTTTTTATCAATTTTCACCATTCAT 1
+TTTTTTATCATTTTTCACCTAAAAAA 1
+TTTTTTTTTTGTTTTTATTTTTATCAT 1
b
diff -r 000000000000 -r a8aacccd79a3 test-data/output.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.tab Mon Sep 04 07:13:28 2017 -0400
b
@@ -0,0 +1,247 @@
+ACAGCAGGACGGTGATCA 2
+TGCTTGGACTACATATGGTTGAGTG 2
+TGGAATGTAAAGAAGTATGGAG 2
+AATTTATTTAATTTATATTCTAACTAA 1
+TTATAATCACGGCACCCTATACA 1
+TCCGAAAAATCGTAGGACCCGGGCA 1
+TCCCAACCCTCGAGCATCATTTTC 1
+TTGATTCTTCTTTTTCACAAAA 1
+AGAACAATTAAATAAAATAGCATA 1
+CCAGAAAACAATACAACATCCTCA 1
+TCTAGTCTGAGCGTAGTACCAGATTG 1
+TTTTTAACTCCCATCATTTTTCCTC 1
+GCAGATAGAAATCAATACAAAAATC 1
+AAGTGAAGAAGTAGTTTTT 1
+AATGTCACTTGAAGAATTCACGT 1
+TTTACCAGAGGAGTCGAGTTTTT 1
+GGTATCTTTATATTTTAATTTTCTT 1
+TTACTAGATCCACCCTCATTA 1
+TATATAAATCTTCAACATCAA 1
+GAAACCATTATCTTATCTTTATACA 1
+GGAGATTGTAGAACGAAAGGAAAAT 1
+ACTAAACTTTTCTTACCATATTTCTA 1
+TCAAGCCTTTTGAAGAACTGACCTAAA 1
+TAACATAAATTTTAATCATAAATTG 1
+TAGCGAGATGGACCAACGTGCTGT 1
+TACAAACGTAATTTTCGCATAACATC 1
+CGGAAAAGAATGTAGACCATTTAA 1
+GAAAGGAAGGGAAGAAAGCGAAAGGA 1
+CAAGAATACAAAAAATACTAATTA 1
+CATACCTACAAAAAAGCTTCTCTTAC 1
+TAAAATAAATAAGTCCGACGACAA 1
+AGGAATATGATGAAATAAAAAAAT 1
+TTTTTTTTTTGTTTTTATTTTTATCAT 1
+TTTTTAACTCATTTTACAATTAAAC 1
+CCGCGATCTGCTTATTTATAATCTT 1
+TAGGTACTTACCTTTTTTTTACACAA 1
+TCATTACACTTCTTACAAAAC 1
+TATTATACATAGAATAACAAATCTTT 1
+ATGTTATTTACTTTTTCCCCTTATA 1
+TCCGAAAACAAGGCCCGTCGCT 1
+TGCTTTTACTACATATTTTTTATTTTTTTA 1
+GAACAATTTTTCAATTTTTTACATTA 1
+AACATTTTATCAATTATACATTA 1
+TATCTGATCAACAATCTTTTCCCAT 1
+AATTGCAACAGAGACTGGAA 1
+TATTCAATCACTCCATTATATATAACA 1
+CAAACGGAACAAGACATCACCATC 1
+TCTTCTATATAATCCTTTATTATAA 1
+TGATGACGGGCAGCAGGGATTTTC 1
+TTTATTACAACCCTATCTTACCTCAA 1
+AACAGGAAAAACAGAAGGATTTCTA 1
+CGATATTTTCTCCTCGTACC 1
+AAATTGCAAAGATGGAAAATAAAACT 1
+TACACACTCATCAACCAAAGGACG 1
+TATATTGCCTCCCCATAATCCTT 1
+TACAAACGGAACTTTCTTCATAACTTC 1
+GGACGGAGAACTGATAATGGC 1
+TTCTTTGACTACATATTTTTTATT 1
+TCTTTTTTTTAATACTTATTTTCATT 1
+TACTTTTTTCTTAATTTTTTATTAAAC 1
+AATATAAAAATACAATCAACCATTGCA 1
+CAGTTTCACAAAAGATCTTTTAA 1
+GAAACAAACAACACATACCCTCTGGC 1
+AATGACACACTCTTCATCAAC 1
+AAGATGGAGTAGTTTTTT 1
+ACAACCTCAACTCATATTT 1
+TTAAACAATTTGGAATTAATT 1
+TACAAAAAATGCGAAAATTGACCCT 1
+GGACGGAGAACTGATAAGGGCA 1
+TTAAGTTTTAGACATAATCTATTACAA 1
+TTATTATCTATTTTAATTTTTCTTAA 1
+CACCGAACCGGGAAGGCGAACAAC 1
+TCCACCTATTTATCTTTTCTT 1
+TCGCCGTAAAGCCAGTCGTTCTCC 1
+TCAAAGAACAATGTAAAGCCGCGAC 1
+TGGACAAGAACCACGCGACGGGTGT 1
+CCCGAAAAGCCGAGGACGACTTA 1
+TTTTTTATCATTTTTCACCTAAAAAA 1
+TAGAACTCGAACCAGAGCTCC 1
+TCTATATTATTTTTATCAATTTTCACC 1
+TCTATTTCTTTATTTTTTTTATTAT 1
+TTTGATACCTTTATACCATACCTATT 1
+ATAAAGCTAGATTACCAAAGCAT 1
+GCCAACGACCATACCACGA 1
+CGGCACATGTTGAATTACACTCA 1
+TACTATTTTATTATACATACATACATTA 1
+TTAATGACACACGGGAAAAACACCG 1
+TAGTTTCACTACTTTATTCTTTTTA 1
+AACAGGGAGATCAACAGCGTTGACA 1
+CGATATTTTCTCCTCTGACC 1
+ACCAGCACCTTCCGACTCAACGTCAAA 1
+AAGGAATTAAAGCAATAATTCTAA 1
+TACAAAACAAACAAATTACAATCTAAA 1
+CAATTTTTAATTCCTTTTTTCTTCTT 1
+TACAGACAACACATACGGACTTAA 1
+TCTGTATTTGACTTATTACTTTCTCC 1
+TGAGCTAGAACTGCACCCACTCCA 1
+CGCCGCAAGATGAATACTCTAATGA 1
+TATTTCTTTTTTAACTTCTTTTC 1
+TTACAATCTACTATTCTTTTATTA 1
+TTTAAACACTTCCTACATCAAATTTC 1
+TGTGTAATCTTTCTACTTCTTCTAC 1
+TCTATTCATACAAAACACTAATACCC 1
+TGGAGTAGCACAGTCGTCTGAAATC 1
+AAGCACGCCTTACCACAATTTATAA 1
+CTGGAAACTATTGATCAAATT 1
+TACACAGACTTACAAAACACATCCTTC 1
+TTCAAGTAGATTGCATTTTTTAATA 1
+TTATTACATCGTCCACATATAACAAAA 1
+CAAGGCTCAGAAGAACATCACCAAGACC 1
+TGAGGAAAACAGAAAAATGAGAGACA 1
+TCAAAAAGTAATAGGGATCGTTA 1
+TAACTTTAACTTTTTTACT 1
+TATTCCGACAATACCTTCTTTAC 1
+TTTGTTTTTTACTATATTT 1
+TTCATTTTATTTTTAAATATCTTTTTT 1
+TACTCAATAGAACTCTACTCACTCATA 1
+TGAAAGGAAAAACAGGACACGGGA 1
+AAAATCGACTGCCGAAAACATTTTAA 1
+TACAGAGAAATATACAACACTCACC 1
+TCAACTGGCAAGAATTTTTGAAAATT 1
+GAGAACTTTTAATCATTTTAC 1
+TATTATCATCTCGTTCTTCCTTCTC 1
+TTTTCATTTCTTCTTCAAATCCTTT 1
+TAGTCATACATACCTAATTATACATA 1
+ATTTTACTTCATCATTTTC 1
+TCTCTTTTATTTTTATCTTTCCTT 1
+GCCGGGGCGTGAGATGTCTGCATTA 1
+AGGATTTTTAAGCCCATATGTTTCC 1
+CAAGATATGAACAAAGCAAAGACAC 1
+CAACACATGACGCGACAATTCTTG 1
+CAAATAACAAACTGAATAAACGAAA 1
+TGAGAATGACTTCTTCACGATCTCTT 1
+TCTTATTATCATTTTTTTATCCCTT 1
+TCAAATGCAAATTGGATTTATGA 1
+CCTTACTCAACATACTTAATCATACTTA 1
+TAGACTTTCTACTCATTATTAC 1
+TGAAACTGAAACTAACATACAAAATATT 1
+AAAACCCGGACAAACCATCGGAGGA 1
+TACAGACAACACATACGGACTTAAGT 1
+TATTTCAGCAACAGACTAAGACTAA 1
+AACTTTAAATTTTTAATAACCTT 1
+TATTTATAAATTTTTTCTTGAGAC 1
+TTAATATGTAATTTCATACCTCAC 1
+CACAGACTGAGGCAGAAAAAACAA 1
+TAAAGAAGAAGAATTGATTTTAAT 1
+TACTGAAAACGGGCGCATATCAGTGG 1
+TCAGTCTTTTTTTCTCTCCTA 1
+TATAATTTTATTTTATATTTTCTCT 1
+NATTCTTACTCCATTTCAATTTACT 1
+TTGTAAAACATTCTTTCTCCTGAC 1
+TAATTACCATTGCTAACTATCCA 1
+TTCTTCCTTTTATCCTCTCTTAA 1
+TCTAAACACCCACGAAAATCTCTTAC 1
+AAAAACACACAGACACAAGCAGCAAT 1
+CGGACGGTATATTTTTTAATATAA 1
+TATGGAGAAACAGCGATATAAGTCA 1
+TACAACTAACATCCTTTCTTCTTCC 1
+AACTCTCTAATTTAACTTTGTGC 1
+TCCTGAGGACGAGGGGCGTTTAGC 1
+TATTTCCAACCTTCAACCTCAAATAA 1
+TGGACGGAGAACTGATAAGGGC 1
+TTTAAGACTTATGAGCTTG 1
+TTAAAGACGCAACAACTAACATT 1
+TAGGAACTTCATACCGGTCTC 1
+CGATATTTTCTCCTCTTACC 1
+GAGGATTAAAAGAACGGTTTATAA 1
+GAATGATCGCACCACCACCTCAACGTT 1
+TTTTCTTTACCCATCTTTACTTTCCC 1
+AAGACAACAATGACATATAAGACG 1
+TAATAATTTAAATAAATATAAATTT 1
+TACTGAAACAAGGAAACACAAGC 1
+TCAGAAGAACAGAGAATTGATTTT 1
+CATACCTTAAATTATCTCTTTCTT 1
+TTCTTTTACTACATATTTTTTATTTTT 1
+AAAAAATATCTTTTTTAACTCGTGGCC 1
+TAACAAATAGAACGTTCTAATTTAAA 1
+TAGTTACCTTCATATCTCTCTTTA 1
+TAAAATTGTAATATTTAAATAATAT 1
+AAAAGGAAAAACAGAAAAATTGGG 1
+AGATGTTGATCTAAACTCTCCCA 1
+TACCTCTTTATTAACCTCCACCTCTA 1
+TTTCCGACAAATACACCATCTTC 1
+ACAAATCATAAATTTTTTTTTACT 1
+GACGAAACGCAACAACAAAATGGACG 1
+TACAAATTTTTTTTTCTTTCTTAT 1
+TACACCTCTTTTTACTTTTTTATT 1
+TATGGATTATTTCAAAATTTTTTTTT 1
+TTCTAGCACAACACGCACACATATA 1
+TAACTACTTTTACATTAATACTAA 1
+TCTCATCTTACAATTTTTTAAAACTT 1
+TTCTTGGACTACACATTTTTTATTGTTTTA 1
+TACACACTCATCAACCAAAGTACGTA 1
+TACTATATACTTCTTCAAATCACA 1
+TCAGAGTTCTACAGGTCCTACGATT 1
+TGATTTACTTACATTCTTTTTTT 1
+CCATATATGACTGACTCATTTCAC 1
+GAAGAGGAGGAGGAGTTTGTAAG 1
+AAAGACAAAAGAAATACAGGCACT 1
+TACAAGACTAAAACAAACGTGAAGT 1
+TAACGGAGCACGAGAACGAAGTGG 1
+CTTCTTTTACTACATATTTTTTATTTTTTTA 1
+TAATAAGAAACTGTTCAAACAATCCAC 1
+TGAGCGGAGAACCAGAGTTGATGAGC 1
+TATTATTTTTTTATTCCATTCATAT 1
+TTTATTACTTAGTCATAATTCCAA 1
+TTTTATATTTCCTTATATCTTTACTA 1
+AACGGGGAATAAGGGTTCG 1
+AATCTACAATTTCCATTACGACTCC 1
+CCGACCGAGCAAATAAACACAGGAACG 1
+TCCACAACAACTCTATCTAAAGCATT 1
+TTCTTGATAACGCATCTTCTACAT 1
+TGCTTGGACTACATATGGTTGAGGG 1
+CAGATTCACTGATTTTCTTACGCC 1
+TTTGTTTTTCATTTTTTTATCTTT 1
+CTATATTTTCTCTCTTACC 1
+TAACCTTGCAGAACTATACGATTCAAA 1
+TAAGAAACTGAGCTAACGCAATGTACC 1
+TTCTTTTACTACATATTTTTTATTTTTTTA 1
+TATCTATCTTTGATCTTCTTTTCA 1
+TAATAAATTATTAAATAAAAAAAAAA 1
+TTTTTTATCAATTTTCACCATTCAT 1
+TATTTCACTTTATACTTCCTTAA 1
+TAGTTTTAAATATTTCTTTTTTTC 1
+TTCTTTTACTACATATTGTTTATTTTTTTA 1
+GAGAATAAATATTTCAATGGTCTATTG 1
+CGATATTTTCTCCTCTTACCT 1
+CACGACTTTATTCTTTTTATCTCA 1
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA 1
+CATAATATAAACTTATCTT 1
+ATGAAATTCGAACAATACGTC 1
+AACAACTGCAAACATCTACCACA 1
+TAAAAATAATTGTCTTTAATTTCA 1
+CGCAACCAGCAGCAACTCCTAGCAT 1
+ATTATTAATAAATTATTATAA 1
+CATTAATTCATCCATTTAAACTAA 1
+TCTTATTTTAATCTTCCAATTTC 1
+CTAGACAAGATGCTATAAATTTTAAA 1
+TGACCAAAGACAAACAAACAATAAATA 1
+TTTTTATCAATTTTCACCATTC 1
+TAAGTTTTTAATCATTTTTTTT 1
+TAATCAAAAAACTCTTCATTTTTA 1
+TACAAACGGAACTTTCGTCATAA 1
+TTTTCTTTTTTTCATTTTCTCTTTTA 1
+TAGCCTTTACTAGGCTTTTTCTAA 1
+TTAGTATTAATCTTCACTTAA 1
+TAAAATAAACCAAAACCCAAAAAT 1
b
diff -r 000000000000 -r a8aacccd79a3 test-data/sort.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort.py Mon Sep 04 07:13:28 2017 -0400
[
@@ -0,0 +1,8 @@
+import sys
+
+F = open(sys.argv[1], 'r')
+lines = F.readlines()
+lines = [line[:-1] for line in lines]
+for line in sorted(lines):
+    print(line)
+F.close()