changeset 0:a8aacccd79a3 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
author artbio
date Mon, 04 Sep 2017 07:13:28 -0400
parents
children 9ce7ccd468aa
files sequence_format_converter.py sequence_format_converter.xml test-data/fastqTofasta.sorted.fa test-data/fastqTofastaw.sorted.faw test-data/fastqTotabular.sorted.tab test-data/input.fa test-data/input.fastqsanger test-data/input.sorted.fa test-data/output.faw test-data/output.sorted.faw test-data/output.sorted.tab test-data/output.tab test-data/sort.py
diffstat 13 files changed, 4532 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sequence_format_converter.py	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,208 @@
+#!/usr/bin/env python
+#
+import argparse
+import logging
+import sys
+from collections import defaultdict
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument(
+        '--input', action="store", type=str,
+        help="input file, accepted format: fastq, fasta, fasta_weigthed, \
+            tabular")
+    the_parser.add_argument(
+        '--output', action="store", type=str, help="output converted file")
+    the_parser.add_argument(
+        '--format', action="store", type=str,
+        help="select output format (fasta, fasta_weigthed, tabular")
+    args = the_parser.parse_args()
+    return args
+
+
+class Sequencing:
+
+    def __init__(self, input, output, format):
+        self.input = input
+        self.output = open(output, 'w')
+        self.outputformat = format
+        self.inputformat = self.detectformat(self.input)
+        self.seqdic = defaultdict(int)
+        self.read(self.input, self.inputformat)
+        self.write(self.output, self.outputformat)
+
+    def detectformat(self, input):
+        input = open(input, 'r')
+        block = []
+        reference = ['A', 'T', 'G', 'C', 'N']
+        format = ''
+        try:
+            for l in range(4):
+                block.append(input.readline()[:-1])
+        except:
+            logging.info("File hasn't at leat four lines !")
+            sys.exit("File hasn't at leat four lines !")
+        input.close()
+        line1, line2, line3, line4 = block[0], block[1], block[2], block[3]
+        if line1[0] == '>' and line3[0] == '>':
+            logging.info("'>' detected in lines 1 and 3")
+            sequence = ''.join([line2, line4]).upper()
+            nucleotides = set([base for base in sequence])
+            for nucleotide in nucleotides:
+                if nucleotide not in reference:
+                    logging.info("But other nucleotides that A, T, G, C or N")
+                    sys.exit('input appears to be Fasta but with \
+                              unexpected nucleotides')
+            format = 'fasta'
+        elif line1[0] == '>' and line4[0] == '>':
+            logging.info("'>' detected in lines 1 and 4")
+            sequence = ''.join([line2, line3]).upper()
+            nucleotides = set([base for base in sequence])
+            for nucleotide in nucleotides:
+                if nucleotide not in reference:
+                    logging.info("But other nucleotides that A, T, G, C or N")
+                    sys.exit('input appears to be Fasta but with \
+                              unexpected nucleotides')
+            format = 'fasta'
+        elif line1[0] == '>':
+            logging.info("'>' detected in lines 1")
+            sequence = ''.join([line2, line3, line4]).upper()
+            nucleotides = set([base for base in sequence])
+            for nucleotide in nucleotides:
+                if nucleotide not in reference:
+                    logging.info("But other nucleotides that A, T, G, C or N")
+                    sys.exit('input appears to be Fasta but with \
+                              unexpected nucleotides')
+            format = 'fasta'
+        if format == 'fasta':
+            try:
+                for line in block:
+                    if line[0] == '>':
+                        int(line.split('_')[-1])
+                return 'fastaw'
+            except:
+                return 'fasta'
+        if line1[0] == '@' and line3[0] == '+':
+            nucleotides = set([base for base in line2])
+            for nucleotide in nucleotides:
+                if nucleotide not in reference:
+                    logging.info("Looks like fastq input but other nucleotides \
+                                 that A, T, G, C or N")
+                    sys.exit("input appears to be Fastq \
+                             but with unexpected nucleotides")
+            return 'fastq'
+        for line in block:
+            if len(line.split('\t')) != 2:
+                logging.info("No valid format detected")
+                sys.exit('No valid format detected')
+            try:
+                int(line.split('\t')[-1])
+            except:
+                logging.info("No valid format detected")
+                sys.exit('No valid format detected')
+            for nucleotide in line.split('\t')[0]:
+                if nucleotide not in reference:
+                    logging.info("No valid format detected")
+                    sys.exit('No valid format detected')
+        return 'tabular'
+
+    def read(self, input, format):
+        input = open(input, 'r')
+        if format == 'fasta':
+            try:
+                self.readfasta(input)
+            except:
+                logging.info("an error occured while reading fasta")
+        elif format == 'fastaw':
+            try:
+                self.readfastaw(input)
+            except:
+                logging.info("an error occured while reading fastaw")
+        elif format == 'tabular':
+            try:
+                self.readtabular(input)
+            except:
+                logging.info("an error occured while reading tabular")
+        elif format == 'fastq':
+            try:
+                self.readfastq(input)
+            except:
+                logging.info("an error occured while reading fastq")
+        else:
+            logging.info("no valid format detected")
+            sys.exit('No valid format detected')
+
+    def readfastaw(self, input):
+        for line in input:
+            if line[0] == ">":
+                weigth = int(line[:-1].split("_")[-1])
+            else:
+                self.seqdic[line[:-1]] += weigth
+        input.close()
+
+    def readfasta(self, input):
+        ''' this method is able to read multi-line fasta sequence'''
+        for line in input:
+            if line[0] == ">":
+                try:
+                    #  to dump the sequence of the previous item
+                    #  try because of first missing stringlist variable
+                    self.seqdic["".join(stringlist)] += 1
+                except NameError:
+                    pass
+                stringlist = []
+            else:
+                try:
+                    stringlist.append(line[:-1])
+                except UnboundLocalError:
+                    # if file went through filter and contains only empty lines
+                    logging.info("first line is empty.")
+        try:
+            self.seqdic["".join(stringlist)] += 1  # for the last sequence
+        except NameError:
+            logging.info("input file has not fasta sequences.")
+        input.close()
+
+    def readtabular(self, input):
+        for line in input:
+            fields = line[:-1].split('\t')
+            self.seqdic[fields[0]] += int(fields[1])
+        input.close()
+
+    def readfastq(self, input):
+        linecount = 0
+        for line in input:
+            linecount += 1
+            if linecount % 4 == 2:
+                self.seqdic[line[:-1]] += 1
+        input.close()
+
+    def write(self, output, format='fasta'):
+        if format == 'fasta':
+            headercount = 0
+            for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True):
+                for i in range(self.seqdic[seq]):
+                    headercount += 1
+                    output.write('>%s\n%s\n' % (headercount, seq))
+        elif format == 'fastaw':
+            headercount = 0
+            for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True):
+                headercount += 1
+                output.write('>%s_%s\n%s\n' % (headercount,
+                                               self.seqdic[seq], seq))
+        elif format == 'tabular':
+            for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True):
+                output.write('%s\t%s\n' % (seq, self.seqdic[seq]))
+        output.close()
+
+
+def main(input, output, format):
+    Sequencing(input, output, format)
+
+
+if __name__ == "__main__":
+    args = Parser()
+    log = logging.getLogger(__name__)
+    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+    main(args.input, args.output, args.format)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sequence_format_converter.xml	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,169 @@
+<tool id="sequence_format_converter" name="sequence_format_converter" version="2.0.0">
+  <description></description>
+  <command><![CDATA[
+      python '$__tool_directory__'/sequence_format_converter.py
+      --input '$input'
+      --output '$output'
+      --format '$output_format'
+  ]]></command>
+
+<inputs>
+    <param name="input" type="data" format="fasta, fastq, tabular" label="file to convert to tabular (input format is automatically detected)"/>
+    <param name="output_format" type="select" label="conversion options">
+        <option value="tabular" selected="true">tabular</option>
+        <option value="fasta">Fasta</option>
+        <option value="fastaw">Weighted fasta</option>
+    </param>
+   </inputs>
+
+ <outputs>
+   <data format="fasta" name="output" label="${output_format} conversion of ${input.name}">
+     <change_format>
+        <when input="output_format" value="tabular" format="tabular"/>
+     </change_format>
+   </data>
+</outputs>
+
+    <tests>
+        <test>
+            <!-- convertion fasta to tabular -->
+            <param name="output_format" value="tabular" />
+            <param ftype="fasta" name="input" value="input.fa" />
+            <output file="output.tab" name="output" />
+        </test>
+        <test>
+            <!-- convertion tabular to fasta -->
+            <param name="output_format" value="fasta" />
+            <param ftype="tabular" name="input" value="output.tab" />
+            <output file="input.sorted.fa" name="output" sort="True"/>
+        </test>
+        <test>
+            <!-- convertion fastaw to tabular -->
+            <param name="output_format" value="tabular" />
+            <param ftype="fasta" name="input" value="output.faw" />
+            <output file="output.sorted.tab" name="output" sort="True"/>
+        </test>
+        <test>
+            <!-- convertion tabular to fastaw -->
+            <param name="output_format" value="fastaw" />
+            <param ftype="tabular" name="input" value="output.tab" />
+            <output file="output.sorted.faw" name="output" sort="True" />
+        </test>
+         <test>
+            <!-- convertion fasta to fastaw -->
+            <param name="output_format" value="fastaw" />
+            <param ftype="fasta" name="input" value="input.fa" />
+            <output file="output.sorted.faw" name="output" sort="True" />
+        </test>
+        <test>
+            <!-- convertion fastaw to fasta -->
+            <param name="output_format" value="fasta" />
+            <param ftype="fasta" name="input" value="output.faw" />
+            <output file="input.sorted.fa" name="output" sort="True" />
+        </test>
+        <test>
+            <!-- convertion fastq to tabular -->
+            <param name="output_format" value="tabular" />
+            <param ftype="fastq" name="input" value="input.fastqsanger" />
+            <output file="fastqTotabular.sorted.tab" name="output" sort="True" />
+        </test>
+         <test>
+            <!-- convertion fastq to fasta -->
+            <param name="output_format" value="fasta" />
+            <param ftype="fasta" name="input" value="input.fastqsanger" />
+            <output file="fastqTofasta.sorted.fa" name="output" sort="True" />
+        </test>
+        <test>
+            <!-- convertion fastq to fastaw -->
+            <param name="output_format" value="fastaw" />
+            <param ftype="fasta" name="input" value="input.fastqsanger" />
+            <output file="fastqTofastaw.sorted.faw" name="output" sort="True" />
+        </test>
+   </tests>
+
+
+<help>
+
+**What it does**
+
+The tool performs all pairwise conversions between sequence formats fasta, fastaw and tabular.
+
+The tool is also able to convert fastq format in any of the formats fasta, fastaw and tabular.
+
+The format of the input is automatically detected by the tool.
+
+**Formats**
+
+*Fasta*
+
+>id1
+
+ATGCATGACCAGATAGGAC
+
+>id2
+
+ATGCATGACCAGATAGGAC
+
+Note that the tool handles fasta sequences over multiple lines
+
+
+----------
+
+*Fastaw*
+
+Allows to reduce the size of a fasta file of sequence reads:
+
+>id1_n1
+
+ATGCATGACCAGATAGGAC
+
+>id2_n2
+
+ATGCATGACCAGATAGGAC
+
+etc...
+
+Here n1 and n2 are integers that indicate the number of reads of the sequence found in the sequencing dataset
+
+Note that if 2 fastaw files are merged (e.g. by concatenation), the values of the number of reads are wrong.
+
+These values can simply be re-computed by submitting the merged file to a fastaw conversion with the *sequence_format_converter* tool !
+
+
+----------
+
+*Tabular*
+
+Is a tabular version of fastaw without fasta headers:
+
+column 1               column2
+
+ATGCATGACCAGATAGGAC    n1
+
+ATGCATGACCAGATAGGAC    n2
+
+
+----------
+
+*Fastq*
+
+@HWI-1
+
+ATGCATGACCAGATAGGAC
+
+\+
+
+BBBA;ACB9ABCBABB@@/
+
+@HWI-2
+
+ATGCATGACCAGATAGGAC
+
+\+
+
+?03@?82?B>C@B>@CC?0
+
+
+</help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqTofasta.sorted.fa	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,500 @@
+>1
+>10
+>100
+>101
+>102
+>103
+>104
+>105
+>106
+>107
+>108
+>109
+>11
+>110
+>111
+>112
+>113
+>114
+>115
+>116
+>117
+>118
+>119
+>12
+>120
+>121
+>122
+>123
+>124
+>125
+>126
+>127
+>128
+>129
+>13
+>130
+>131
+>132
+>133
+>134
+>135
+>136
+>137
+>138
+>139
+>14
+>140
+>141
+>142
+>143
+>144
+>145
+>146
+>147
+>148
+>149
+>15
+>150
+>151
+>152
+>153
+>154
+>155
+>156
+>157
+>158
+>159
+>16
+>160
+>161
+>162
+>163
+>164
+>165
+>166
+>167
+>168
+>169
+>17
+>170
+>171
+>172
+>173
+>174
+>175
+>176
+>177
+>178
+>179
+>18
+>180
+>181
+>182
+>183
+>184
+>185
+>186
+>187
+>188
+>189
+>19
+>190
+>191
+>192
+>193
+>194
+>195
+>196
+>197
+>198
+>199
+>2
+>20
+>200
+>201
+>202
+>203
+>204
+>205
+>206
+>207
+>208
+>209
+>21
+>210
+>211
+>212
+>213
+>214
+>215
+>216
+>217
+>218
+>219
+>22
+>220
+>221
+>222
+>223
+>224
+>225
+>226
+>227
+>228
+>229
+>23
+>230
+>231
+>232
+>233
+>234
+>235
+>236
+>237
+>238
+>239
+>24
+>240
+>241
+>242
+>243
+>244
+>245
+>246
+>247
+>248
+>249
+>25
+>250
+>26
+>27
+>28
+>29
+>3
+>30
+>31
+>32
+>33
+>34
+>35
+>36
+>37
+>38
+>39
+>4
+>40
+>41
+>42
+>43
+>44
+>45
+>46
+>47
+>48
+>49
+>5
+>50
+>51
+>52
+>53
+>54
+>55
+>56
+>57
+>58
+>59
+>6
+>60
+>61
+>62
+>63
+>64
+>65
+>66
+>67
+>68
+>69
+>7
+>70
+>71
+>72
+>73
+>74
+>75
+>76
+>77
+>78
+>79
+>8
+>80
+>81
+>82
+>83
+>84
+>85
+>86
+>87
+>88
+>89
+>9
+>90
+>91
+>92
+>93
+>94
+>95
+>96
+>97
+>98
+>99
+AATGGCACTGGAAGAATTCACGG
+AATGGCACTGGAAGAATTCACGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGGT
+AATGGCACTGGAAGAATTCACGTG
+AATTGCACTAGTCCCGGCCTG
+ACTGAATTCTCGTGGGTCTGCAT
+AGGACGGGAAGGTGTCAACG
+ATAAAGCTAGATTACCAAAGCAT
+CAAATTCGGTTCTAGAGAGGTT
+CGAATAGCGTTGTGACTGA
+CGGACGGTATATGGGTTAATATT
+CGGATGATGGTTCACAACGACC
+CGGCACATGTTGAAGTACACTCA
+CGGCACATGTTGAAGTACACTCA
+CGGCACATGTTGAAGTACACTCAA
+CGGCACATGTTGAAGTACACTCAA
+CTGACTAGATCCACACTCATTA
+GGACGGAGAACTGATAAGGGCTCGG
+GGCGAACATGGATCTAGTGCACG
+GGGAGCGAGACGGGGACTCAC
+GGGAGCGAGACGGGGACTCACT
+GGGAGCGAGACGGGGACTCACT
+TAAAGCTAGATTACCAAAGCAT
+TAAAGCTAGATTACCAAAGCAT
+TAAAGCTAGATTACCAAAGCAT
+TAAGGAAATAGTAGCCGTGAT
+TAAGGAAATAGTAGCCGTGAT
+TAAGGAAATAGTAGCCGTGAT
+TAGCACCACATGATTCGGCT
+TAGGAACTTCATACCGTGCTCT
+TAGGAACTTCATACCGTGCTCT
+TATCACAGCCAGCTTTGAGGAG
+TATCACAGCCATTTTGACGAGTT
+TATCACAGCCATTTTGACGAGTT
+TATTGCACTTGAGACGGCCTTA
+TCAGGTACCTGAAGTAGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCGTTAT
+TCTTTGGTATTCTAGCTGTAGA
+TGACTAGATCCACACTCATTA
+TGACTAGATCCACACTCATTAA
+TGACTAGATCCACACTCATTAA
+TGACTAGATCCACACTCATTAC
+TGACTAGATTCACACTCATTA
+TGGAATGTAAAGAAGAATGGAG
+TGGAATGTAAAGAAGTATGG
+TGGAATGTAAAGAAGTATGG
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAGA
+TGGAATGTAAAGAAGTATGGAGA
+TGGAATGTAAAGAAGTATGGAGT
+TGGAATGTAAAGAATTATGGAG
+TGGAATGTAAAGGAGTATGGAG
+TGGACGGAGAACTGATAAGG
+TGGACGGAGAACTGATAAGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAAA
+TGGACGGAGAACTGATAAGGGCAT
+TGGACGGAGAACTGATAAGGGCT
+TGGACGGAGAACTGATAAGGGCTT
+TGGACGGAGAACTGATAAGGGT
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAAA
+TGTGATGTGACGTAGTGGAAA
+TGTGATGTGACGTAGTGGAAC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqTofastaw.sorted.faw	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,110 @@
+>10_3
+>11_3
+>12_2
+>13_2
+>14_2
+>15_2
+>16_2
+>17_2
+>18_2
+>19_2
+>1_86
+>20_2
+>21_2
+>22_2
+>23_1
+>24_1
+>25_1
+>26_1
+>27_1
+>28_1
+>29_1
+>2_43
+>30_1
+>31_1
+>32_1
+>33_1
+>34_1
+>35_1
+>36_1
+>37_1
+>38_1
+>39_1
+>3_16
+>40_1
+>41_1
+>42_1
+>43_1
+>44_1
+>45_1
+>46_1
+>47_1
+>48_1
+>49_1
+>4_10
+>50_1
+>51_1
+>52_1
+>53_1
+>54_1
+>55_1
+>5_9
+>6_8
+>7_7
+>8_6
+>9_4
+AATGGCACTGGAAGAATTCACGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGGT
+AATGGCACTGGAAGAATTCACGTG
+AATTGCACTAGTCCCGGCCTG
+ACTGAATTCTCGTGGGTCTGCAT
+AGGACGGGAAGGTGTCAACG
+ATAAAGCTAGATTACCAAAGCAT
+CAAATTCGGTTCTAGAGAGGTT
+CGAATAGCGTTGTGACTGA
+CGGACGGTATATGGGTTAATATT
+CGGATGATGGTTCACAACGACC
+CGGCACATGTTGAAGTACACTCA
+CGGCACATGTTGAAGTACACTCAA
+CTGACTAGATCCACACTCATTA
+GGACGGAGAACTGATAAGGGCTCGG
+GGCGAACATGGATCTAGTGCACG
+GGGAGCGAGACGGGGACTCAC
+GGGAGCGAGACGGGGACTCACT
+TAAAGCTAGATTACCAAAGCAT
+TAAGGAAATAGTAGCCGTGAT
+TAGCACCACATGATTCGGCT
+TAGGAACTTCATACCGTGCTCT
+TATCACAGCCAGCTTTGAGGAG
+TATCACAGCCATTTTGACGAGTT
+TATTGCACTTGAGACGGCCTTA
+TCAGGTACCTGAAGTAGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCGTTAT
+TCTTTGGTATTCTAGCTGTAGA
+TGACTAGATCCACACTCATTA
+TGACTAGATCCACACTCATTAA
+TGACTAGATCCACACTCATTAC
+TGACTAGATTCACACTCATTA
+TGGAATGTAAAGAAGAATGGAG
+TGGAATGTAAAGAAGTATGG
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAGA
+TGGAATGTAAAGAAGTATGGAGT
+TGGAATGTAAAGAATTATGGAG
+TGGAATGTAAAGGAGTATGGAG
+TGGACGGAGAACTGATAAGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAAA
+TGGACGGAGAACTGATAAGGGCAT
+TGGACGGAGAACTGATAAGGGCT
+TGGACGGAGAACTGATAAGGGCTT
+TGGACGGAGAACTGATAAGGGT
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAAA
+TGTGATGTGACGTAGTGGAAC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqTotabular.sorted.tab	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,55 @@
+AATGGCACTGGAAGAATTCACGG	2
+AATGGCACTGGAAGAATTCACGGG	4
+AATGGCACTGGAAGAATTCACGGGT	1
+AATGGCACTGGAAGAATTCACGTG	1
+AATTGCACTAGTCCCGGCCTG	1
+ACTGAATTCTCGTGGGTCTGCAT	1
+AGGACGGGAAGGTGTCAACG	1
+ATAAAGCTAGATTACCAAAGCAT	1
+CAAATTCGGTTCTAGAGAGGTT	1
+CGAATAGCGTTGTGACTGA	1
+CGGACGGTATATGGGTTAATATT	1
+CGGATGATGGTTCACAACGACC	1
+CGGCACATGTTGAAGTACACTCA	2
+CGGCACATGTTGAAGTACACTCAA	2
+CTGACTAGATCCACACTCATTA	1
+GGACGGAGAACTGATAAGGGCTCGG	1
+GGCGAACATGGATCTAGTGCACG	1
+GGGAGCGAGACGGGGACTCAC	1
+GGGAGCGAGACGGGGACTCACT	2
+TAAAGCTAGATTACCAAAGCAT	3
+TAAGGAAATAGTAGCCGTGAT	3
+TAGCACCACATGATTCGGCT	1
+TAGGAACTTCATACCGTGCTCT	2
+TATCACAGCCAGCTTTGAGGAG	1
+TATCACAGCCATTTTGACGAGTT	2
+TATTGCACTTGAGACGGCCTTA	1
+TCAGGTACCTGAAGTAGCG	1
+TCAGGTACCTGAAGTAGCGCGCG	10
+TCAGGTACCTGAAGTAGCGCGCGTTAT	1
+TCTTTGGTATTCTAGCTGTAGA	1
+TGACTAGATCCACACTCATTA	1
+TGACTAGATCCACACTCATTAA	2
+TGACTAGATCCACACTCATTAC	1
+TGACTAGATTCACACTCATTA	1
+TGGAATGTAAAGAAGAATGGAG	1
+TGGAATGTAAAGAAGTATGG	2
+TGGAATGTAAAGAAGTATGGA	7
+TGGAATGTAAAGAAGTATGGAG	43
+TGGAATGTAAAGAAGTATGGAGA	2
+TGGAATGTAAAGAAGTATGGAGT	1
+TGGAATGTAAAGAATTATGGAG	1
+TGGAATGTAAAGGAGTATGGAG	1
+TGGACGGAGAACTGATAAGG	2
+TGGACGGAGAACTGATAAGGG	6
+TGGACGGAGAACTGATAAGGGC	86
+TGGACGGAGAACTGATAAGGGCA	8
+TGGACGGAGAACTGATAAGGGCAA	9
+TGGACGGAGAACTGATAAGGGCAAA	1
+TGGACGGAGAACTGATAAGGGCAT	1
+TGGACGGAGAACTGATAAGGGCT	1
+TGGACGGAGAACTGATAAGGGCTT	1
+TGGACGGAGAACTGATAAGGGT	1
+TGTGATGTGACGTAGTGGAA	16
+TGTGATGTGACGTAGTGGAAA	2
+TGTGATGTGACGTAGTGGAAC	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fa	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,500 @@
+>1
+TAGTTACCTTCATATCTCTCTTTA
+>2
+TCTATTCATACAAAACACTAATACCC
+>3
+ACAACCTCAACTCATATTT
+>4
+TATAATTTTATTTTATATTTTCTCT
+>5
+TCTTCTATATAATCCTTTATTATAA
+>6
+TAAAATAAACCAAAACCCAAAAAT
+>7
+AATCTACAATTTCCATTACGACTCC
+>8
+TTTCCGACAAATACACCATCTTC
+>9
+CAGATTCACTGATTTTCTTACGCC
+>10
+CAAGAATACAAAAAATACTAATTA
+>11
+AACTCTCTAATTTAACTTTGTGC
+>12
+AAAAACACACAGACACAAGCAGCAAT
+>13
+TTACTAGATCCACCCTCATTA
+>14
+ATGTTATTTACTTTTTCCCCTTATA
+>15
+CGATATTTTCTCCTCTTACC
+>16
+TACAGAGAAATATACAACACTCACC
+>17
+ATGAAATTCGAACAATACGTC
+>18
+GAGAATAAATATTTCAATGGTCTATTG
+>19
+TGCTTTTACTACATATTTTTTATTTTTTTA
+>20
+CATACCTTAAATTATCTCTTTCTT
+>21
+TTTGTTTTTCATTTTTTTATCTTT
+>22
+TTATTATCTATTTTAATTTTTCTTAA
+>23
+TATTATCATCTCGTTCTTCCTTCTC
+>24
+TCAACTGGCAAGAATTTTTGAAAATT
+>25
+TACAAATTTTTTTTTCTTTCTTAT
+>26
+TTTTCTTTTTTTCATTTTCTCTTTTA
+>27
+AAGATGGAGTAGTTTTTT
+>28
+TCTCATCTTACAATTTTTTAAAACTT
+>29
+CATACCTACAAAAAAGCTTCTCTTAC
+>30
+TTTTATATTTCCTTATATCTTTACTA
+>31
+GGAGATTGTAGAACGAAAGGAAAAT
+>32
+TCTATTTCTTTATTTTTTTTATTAT
+>33
+CGGACGGTATATTTTTTAATATAA
+>34
+TTCTTGGACTACACATTTTTTATTGTTTTA
+>35
+TACTATATACTTCTTCAAATCACA
+>36
+ATTTTACTTCATCATTTTC
+>37
+TATTTCCAACCTTCAACCTCAAATAA
+>38
+CACGACTTTATTCTTTTTATCTCA
+>39
+TTCTTTTACTACATATTTTTTATTTTTTTA
+>40
+TTTATTACAACCCTATCTTACCTCAA
+>41
+CGATATTTTCTCCTCGTACC
+>42
+TAATTACCATTGCTAACTATCCA
+>43
+CATTAATTCATCCATTTAAACTAA
+>44
+GAAACAAACAACACATACCCTCTGGC
+>45
+TACTTTTTTCTTAATTTTTTATTAAAC
+>46
+TAACTTTAACTTTTTTACT
+>47
+TTCTTTTACTACATATTGTTTATTTTTTTA
+>48
+TCTAGTCTGAGCGTAGTACCAGATTG
+>49
+TTTTTTATCAATTTTCACCATTCAT
+>50
+AATGACACACTCTTCATCAAC
+>51
+TAACATAAATTTTAATCATAAATTG
+>52
+TCTATATTATTTTTATCAATTTTCACC
+>53
+TCCCAACCCTCGAGCATCATTTTC
+>54
+TAGTCATACATACCTAATTATACATA
+>55
+TACAAAAAATGCGAAAATTGACCCT
+>56
+GAGAACTTTTAATCATTTTAC
+>57
+TCTTATTTTAATCTTCCAATTTC
+>58
+CGGCACATGTTGAATTACACTCA
+>59
+CAGTTTCACAAAAGATCTTTTAA
+>60
+GCCAACGACCATACCACGA
+>61
+CAAATAACAAACTGAATAAACGAAA
+>62
+TAGTTTCACTACTTTATTCTTTTTA
+>63
+TGAGGAAAACAGAAAAATGAGAGACA
+>64
+TATATAAATCTTCAACATCAA
+>65
+TGATTTACTTACATTCTTTTTTT
+>66
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+>67
+TACTGAAAACGGGCGCATATCAGTGG
+>68
+TATTCAATCACTCCATTATATATAACA
+>69
+TATATTGCCTCCCCATAATCCTT
+>70
+TCGCCGTAAAGCCAGTCGTTCTCC
+>71
+TTTAAACACTTCCTACATCAAATTTC
+>72
+TTTGTTTTTTACTATATTT
+>73
+TCTTTTTTTTAATACTTATTTTCATT
+>74
+TAAGTTTTTAATCATTTTTTTT
+>75
+TGAGAATGACTTCTTCACGATCTCTT
+>76
+AATTTATTTAATTTATATTCTAACTAA
+>77
+AAAAAATATCTTTTTTAACTCGTGGCC
+>78
+AAGACAACAATGACATATAAGACG
+>79
+TGAGCTAGAACTGCACCCACTCCA
+>80
+ACTAAACTTTTCTTACCATATTTCTA
+>81
+TATTTCACTTTATACTTCCTTAA
+>82
+TATCTATCTTTGATCTTCTTTTCA
+>83
+TTTTTATCAATTTTCACCATTC
+>84
+TAAAAATAATTGTCTTTAATTTCA
+>85
+TTAAAGACGCAACAACTAACATT
+>86
+TAACCTTGCAGAACTATACGATTCAAA
+>87
+TACTATTTTATTATACATACATACATTA
+>88
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+>89
+AATGTCACTTGAAGAATTCACGT
+>90
+TAATAAGAAACTGTTCAAACAATCCAC
+>91
+AAGCACGCCTTACCACAATTTATAA
+>92
+CCATATATGACTGACTCATTTCAC
+>93
+TTAATATGTAATTTCATACCTCAC
+>94
+AGGATTTTTAAGCCCATATGTTTCC
+>95
+ACAGCAGGACGGTGATCA
+>96
+TGATGACGGGCAGCAGGGATTTTC
+>97
+TTGTAAAACATTCTTTCTCCTGAC
+>98
+TTCTTCCTTTTATCCTCTCTTAA
+>99
+ATTATTAATAAATTATTATAA
+>100
+CTGGAAACTATTGATCAAATT
+>101
+TACAACTAACATCCTTTCTTCTTCC
+>102
+TCAAATGCAAATTGGATTTATGA
+>103
+TCCTGAGGACGAGGGGCGTTTAGC
+>104
+TACACAGACTTACAAAACACATCCTTC
+>105
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+>106
+TTAAACAATTTGGAATTAATT
+>107
+TAGCCTTTACTAGGCTTTTTCTAA
+>108
+CGATATTTTCTCCTCTTACCT
+>109
+TGGAATGTAAAGAAGTATGGAG
+>110
+TTTTTAACTCCCATCATTTTTCCTC
+>111
+TTTTTTATCATTTTTCACCTAAAAAA
+>112
+GAACAATTTTTCAATTTTTTACATTA
+>113
+TATGGATTATTTCAAAATTTTTTTTT
+>114
+TAACGGAGCACGAGAACGAAGTGG
+>115
+CAATTTTTAATTCCTTTTTTCTTCTT
+>116
+AGATGTTGATCTAAACTCTCCCA
+>117
+TGAGCGGAGAACCAGAGTTGATGAGC
+>118
+TAACAAATAGAACGTTCTAATTTAAA
+>119
+CTAGACAAGATGCTATAAATTTTAAA
+>120
+TTTTCTTTACCCATCTTTACTTTCCC
+>121
+TACACACTCATCAACCAAAGGACG
+>122
+TCTTATTATCATTTTTTTATCCCTT
+>123
+TCAGAGTTCTACAGGTCCTACGATT
+>124
+TTTATTACTTAGTCATAATTCCAA
+>125
+GCCGGGGCGTGAGATGTCTGCATTA
+>126
+GACGAAACGCAACAACAAAATGGACG
+>127
+TAGACTTTCTACTCATTATTAC
+>128
+AAATTGCAAAGATGGAAAATAAAACT
+>129
+CCTTACTCAACATACTTAATCATACTTA
+>130
+TGCTTGGACTACATATGGTTGAGTG
+>131
+GAATGATCGCACCACCACCTCAACGTT
+>132
+TCCACCTATTTATCTTTTCTT
+>133
+TGGACAAGAACCACGCGACGGGTGT
+>134
+CAAGATATGAACAAAGCAAAGACAC
+>135
+CAAACGGAACAAGACATCACCATC
+>136
+NATTCTTACTCCATTTCAATTTACT
+>137
+TAGAACTCGAACCAGAGCTCC
+>138
+CGGAAAAGAATGTAGACCATTTAA
+>139
+TACAAACGGAACTTTCGTCATAA
+>140
+GGTATCTTTATATTTTAATTTTCTT
+>141
+TATTCCGACAATACCTTCTTTAC
+>142
+AACTTTAAATTTTTAATAACCTT
+>143
+CATAATATAAACTTATCTT
+>144
+TATTTATAAATTTTTTCTTGAGAC
+>145
+TTTTTTTTTTGTTTTTATTTTTATCAT
+>146
+TATTATACATAGAATAACAAATCTTT
+>147
+TGGAGTAGCACAGTCGTCTGAAATC
+>148
+TATTTCTTTTTTAACTTCTTTTC
+>149
+TTATAATCACGGCACCCTATACA
+>150
+TTCTTTTACTACATATTTTTTATTTTT
+>151
+TAGCGAGATGGACCAACGTGCTGT
+>152
+CCAGAAAACAATACAACATCCTCA
+>153
+TCCGAAAACAAGGCCCGTCGCT
+>154
+TACTCAATAGAACTCTACTCACTCATA
+>155
+AACGGGGAATAAGGGTTCG
+>156
+TCAGTCTTTTTTTCTCTCCTA
+>157
+AATATAAAAATACAATCAACCATTGCA
+>158
+GGACGGAGAACTGATAAGGGCA
+>159
+TAAAGAAGAAGAATTGATTTTAAT
+>160
+TCATTACACTTCTTACAAAAC
+>161
+CCGCGATCTGCTTATTTATAATCTT
+>162
+TCTAAACACCCACGAAAATCTCTTAC
+>163
+AACAGGAAAAACAGAAGGATTTCTA
+>164
+TCTCTTTTATTTTTATCTTTCCTT
+>165
+AACATTTTATCAATTATACATTA
+>166
+GCAGATAGAAATCAATACAAAAATC
+>167
+TTAATGACACACGGGAAAAACACCG
+>168
+TACAGACAACACATACGGACTTAAGT
+>169
+TCCACAACAACTCTATCTAAAGCATT
+>170
+ATAAAGCTAGATTACCAAAGCAT
+>171
+TACCTCTTTATTAACCTCCACCTCTA
+>172
+TACACCTCTTTTTACTTTTTTATT
+>173
+CACCGAACCGGGAAGGCGAACAAC
+>174
+TAGGTACTTACCTTTTTTTTACACAA
+>175
+AGGAATATGATGAAATAAAAAAAT
+>176
+TATTATTTTTTTATTCCATTCATAT
+>177
+TAAAATAAATAAGTCCGACGACAA
+>178
+TCTGTATTTGACTTATTACTTTCTCC
+>179
+AAGGAATTAAAGCAATAATTCTAA
+>180
+TTCATTTTATTTTTAAATATCTTTTTT
+>181
+TTAGTATTAATCTTCACTTAA
+>182
+TATGGAGAAACAGCGATATAAGTCA
+>183
+CCCGAAAAGCCGAGGACGACTTA
+>184
+CACAGACTGAGGCAGAAAAAACAA
+>185
+TCAAGCCTTTTGAAGAACTGACCTAAA
+>186
+TAAGAAACTGAGCTAACGCAATGTACC
+>187
+CGATATTTTCTCCTCTGACC
+>188
+TAACTACTTTTACATTAATACTAA
+>189
+ACCAGCACCTTCCGACTCAACGTCAAA
+>190
+TCAAAGAACAATGTAAAGCCGCGAC
+>191
+TTGATTCTTCTTTTTCACAAAA
+>192
+TACAAAACAAACAAATTACAATCTAAA
+>193
+GAAACCATTATCTTATCTTTATACA
+>194
+CTATATTTTCTCTCTTACC
+>195
+TCAGAAGAACAGAGAATTGATTTT
+>196
+TAATAAATTATTAAATAAAAAAAAAA
+>197
+CCGACCGAGCAAATAAACACAGGAACG
+>198
+AAGTGAAGAAGTAGTTTTT
+>199
+TGGACGGAGAACTGATAAGGGC
+>200
+TGCTTGGACTACATATGGTTGAGGG
+>201
+TACAAGACTAAAACAAACGTGAAGT
+>202
+TGAAACTGAAACTAACATACAAAATATT
+>203
+TATCTGATCAACAATCTTTTCCCAT
+>204
+TTTAAGACTTATGAGCTTG
+>205
+CAAGGCTCAGAAGAACATCACCAAGACC
+>206
+TTCAAGTAGATTGCATTTTTTAATA
+>207
+CGCAACCAGCAGCAACTCCTAGCAT
+>208
+TACAAACGGAACTTTCTTCATAACTTC
+>209
+ACAAATCATAAATTTTTTTTTACT
+>210
+TCCGAAAAATCGTAGGACCCGGGCA
+>211
+CGCCGCAAGATGAATACTCTAATGA
+>212
+TGACCAAAGACAAACAAACAATAAATA
+>213
+AAAAGGAAAAACAGAAAAATTGGG
+>214
+TTCTTGATAACGCATCTTCTACAT
+>215
+TACTGAAACAAGGAAACACAAGC
+>216
+TCAAAAAGTAATAGGGATCGTTA
+>217
+TGGAATGTAAAGAAGTATGGAG
+>218
+TAAAATTGTAATATTTAAATAATAT
+>219
+GAGGATTAAAAGAACGGTTTATAA
+>220
+TTCTTTGACTACATATTTTTTATT
+>221
+TACAAACGTAATTTTCGCATAACATC
+>222
+AGAACAATTAAATAAAATAGCATA
+>223
+TAATAATTTAAATAAATATAAATTT
+>224
+AATTGCAACAGAGACTGGAA
+>225
+TTAAGTTTTAGACATAATCTATTACAA
+>226
+TGAAAGGAAAAACAGGACACGGGA
+>227
+AACAGGGAGATCAACAGCGTTGACA
+>228
+GGACGGAGAACTGATAATGGC
+>229
+TGTGTAATCTTTCTACTTCTTCTAC
+>230
+TTTACCAGAGGAGTCGAGTTTTT
+>231
+AAAATCGACTGCCGAAAACATTTTAA
+>232
+TGCTTGGACTACATATGGTTGAGTG
+>233
+TACAGACAACACATACGGACTTAA
+>234
+GAAGAGGAGGAGGAGTTTGTAAG
+>235
+TTACAATCTACTATTCTTTTATTA
+>236
+TTATTACATCGTCCACATATAACAAAA
+>237
+TTTTTAACTCATTTTACAATTAAAC
+>238
+AAAACCCGGACAAACCATCGGAGGA
+>239
+CAACACATGACGCGACAATTCTTG
+>240
+TACACACTCATCAACCAAAGTACGTA
+>241
+TTTTCATTTCTTCTTCAAATCCTTT
+>242
+AACAACTGCAAACATCTACCACA
+>243
+TAGTTTTAAATATTTCTTTTTTTC
+>244
+AAAGACAAAAGAAATACAGGCACT
+>245
+TTTGATACCTTTATACCATACCTATT
+>246
+ACAGCAGGACGGTGATCA
+>247
+TTCTAGCACAACACGCACACATATA
+>248
+TAATCAAAAAACTCTTCATTTTTA
+>249
+TAGGAACTTCATACCGGTCTC
+>250
+TATTTCAGCAACAGACTAAGACTAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fastqsanger	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,1000 @@
+@HWI-49
+TAAAGCTAGATTACCAAAGCAT
++
+BBBA;ACB9ABCBABB@@/?BB
+@HWI-119
+TGGACGGAGAACTGATAAGGGC
++
+?03@?82?B>C@B>@CC?032<
+@HWI-127
+TGGACGGAGAACTGATAAGGGC
++
+A>9@>;9>:=@3A7@BB#####
+@HWI-135
+TGGACGGAGAACTGATAAGGGC
++
+BB6BA5AC+BBBCC6@B>C<'<
+@HWI-136
+TGGACGGAGAACTGATAAGGGC
++
+B22<A?85=CBCB?;BB===88
+@HWI-161
+TGGACGGAGAACTGATAAGGGC
++
+B@>>A=?;7:;BB>=C@>)1(:
+@HWI-166
+CGGCACATGTTGAAGTACACTCAA
++
+B<2>BCBCACC@C@3@BBCBCCCB
+@HWI-187
+TGGACGGAGAACTGATAAGGGC
++
+B;>@B82:4;B?B?@BB=5:2<
+@HWI-199
+GGGAGCGAGACGGGGACTCAC
++
+@?@@8?6;@AB=72;;=<AB>
+@HWI-36
+ATAAAGCTAGATTACCAAAGCAT
++
+BBBB?AAAA9AABA>7@@:-9AB
+@HWI-39
+AATGGCACTGGAAGAATTCACGG
++
+54=4440)@0365:83:<;5###
+@HWI-227
+TGGAATGTAAAGAAGTATGGAG
++
+BB>ACCBCCC@9@A4>CA66@>
+@HWI-249
+TGGAATGTAAAGAAGTATGGAG
++
+A9>?BA2@AA>5<<4<BA5.:)
+@HWI-252
+TGGAATGTAAAGAAGTATGGAG
++
+>A?:BB:BBA?9<;4?B>3-78
+@HWI-254
+TGGAATGTAAAGAAGTATGGAG
++
+B>A=B@1>B?47<?B9A?09?0
+@HWI-278
+TGGACGGAGAACTGATAAGGGC
++
+A54;A<3:?9A@A7<BA?755=
+@HWI-216
+TGGACGGAGAACTGATAAGGGCA
++
+?34@><6=A<B?@5=A?<30./;
+@HWI-307
+TGGACGGAGAACTGATAAGGGC
++
+B>??BB;@<@BABB@CC==:0?
+@HWI-332
+TGGACGGAGAACTGATAAGGGC
++
+B?8@A>B;BAB@BA?CB@A731
+@HWI-211
+TGGAATGTAAAGAAGTATGGAGA
++
+A61=BA;7B@BBA:9:B?;;@2=
+@HWI-333
+TGGAATGTAAAGAAGTATGGA
++
+BA>>AB8@BB?=>@98BBA>#
+@HWI-355
+TGGAATGTAAAGAAGTATGGAG
++
+B>8BBA7@CBA;>A2?BB2.>8
+@HWI-282
+TGGAATGTAAAGAATTATGGAG
++
+B>8>@A7@8=>>997-8A72##
+@HWI-378
+TGGAATGTAAAGAAGTATGGAG
++
+B7=;AA8=A@@5=;4>BB31:2
+@HWI-372
+AATGGCACTGGAAGAATTCACGGG
++
+BBA;@;>A@;@=?@:?B@######
+@HWI-388
+TGGAATGTAAAGAAGTATGGAG
++
+BA@?BB?BBB??A?0:BA53<>
+@HWI-399
+TGGAATGTAAAGAAGTATGGAG
++
+A;*>?B9@B<+1<@<4B@7)@-
+@HWI-109
+TGGACGGAGAACTGATAAGGGC
++
+BC@CBCC@CBB9@8?CCC>AA;
+@HWI-421
+TGGACGGAGAACTGATAAGGGC
++
+A66?A;0<@;B?A;?BB>38-7
+@HWI-412
+TGTGATGTGACGTAGTGGAA
++
+BA=?>B<>9><09?0=656=
+@HWI-432
+TGGACGGAGAACTGATAAGGGC
++
+A;;B>=;=2@?0@@>?CBA@2=
+@HWI-442
+TGGACGGAGAACTGATAAGGGC
++
+BBA?=;;>B6??@07AA#####
+@HWI-470
+TGGAATGTAAAGAAGTATGGAG
++
+B?3>AC>:AA?AA<)7BA;7=:
+@HWI-499
+TGGACGGAGAACTGATAAGGGC
++
+BA>??=:;B=B8A>>B9=/55=
+@HWI-100
+TGGACGGAGAACTGATAAGGGC
++
+A;6@@96@;?A@BBABC?3<5>
+@HWI-514
+TGACTAGATCCACACTCATTAA
++
+BAABC?7@C@ABABBBBABBBB
+@HWI-521
+TGGACGGAGAACTGATAAGGGC
++
+A2=A5=2=>@B;C>:7A>>:1.
+@HWI-545
+TGGACGGAGAACTGATAAGGGC
++
+BA?BBBAABAB@B>?BB@>A><
+@HWI-549
+TGGACGGAGAACTGATAAGGGC
++
+A66<;;0<>:@4A1<>>#####
+@HWI-431
+TATCACAGCCAGCTTTGAGGAG
++
+B>B?B?@A?69?38<@5902;9
+@HWI-579
+TGGAATGTAAAGAAGTATGGA
++
+BBB@A@BB>B=1<?>>BA###
+@HWI-583
+TGGACGGAGAACTGATAAGGGC
++
+BB:@BCB>>BBAB@>@BA8;00
+@HWI-587
+TAGCACCACATGATTCGGCT
++
+BA;/A;;A1BA:@BB>=4;?
+@HWI-516
+TGGACGGAGAACTGATAAGGGCA
++
+B??>>AA>A9A3A69B?;64779
+@HWI-113
+TGGACGGAGAACTGATAAGGGCA
++
+BBC@@;>BCBCBB*BCC@95**C
+@HWI-592
+TGGAATGTAAAGAAGTATGGAG
++
+BBBBBB?ABA;9@@2=B@)1=4
+@HWI-602
+TGGACGGAGAACTGATAAGGGC
++
+BA;>A@>=8=@?BB>B?:0906
+@HWI-608
+TGGACGGAGAACTGATAAGGGC
++
+BBC>CC9>??CAA9=B@=::36
+@HWI-590
+GGGAGCGAGACGGGGACTCACT
++
+=B=AAAA?CBAB@=A95=%:0;
+@HWI-612
+CGGCACATGTTGAAGTACACTCA
++
+B?>BBBCBABCCA=7@AAABBAA
+@HWI-610
+TCAGGTACCTGAAGTAGCGCGCG
++
+BB@;@2>;>?66>963#######
+@HWI-629
+TGGACGGAGAACTGATAAGGGCA
++
+A68@?;6<0<A=A99BB;'--8;
+@HWI-170
+TGGACGGAGAACTGATAAGGGC
++
+BABCB;;=;=B@BAACB@)80?
+@HWI-382
+TGGACGGAGAACTGATAAGGGCAT
++
+BB>C39>;B4B@CB73@@;<(>BB
+@HWI-648
+TGGACGGAGAACTGATAAGGGC
++
+A8=A?@9<A=A?A;=B>=275<
+@HWI-377
+TGGACGGAGAACTGATAAGGGCAA
++
+B>5>4?A?A;>0:9<=0;;:0.88
+@HWI-658
+TGGAATGTAAAGAAGTATGGAG
++
+B>8>AB9@B?AA>=;>AA82<8
+@HWI-677
+TGGAATGTAAAGAAGTATGG
++
+A:0<B>)<BB@2=>2=AB<4
+@HWI-684
+TGGAATGTAAAGAAGTATGGAG
++
+BAA?BBA@BA==?>8@B?02:)
+@HWI-699
+TGGACGGAGAACTGATAAGGGC
++
+B??B@>;<;>B?BB?AB>####
+@HWI-707
+TGGAATGTAAAGAAGAATGGAG
++
+BBAB9*===@7;;:(.,B??@;
+@HWI-710
+TGGACGGAGAACTGATAAGGGC
++
+BA?AA?8>A=B?BA>AA?####
+@HWI-700
+TGGACGGAGAACTGATAAGGGC
++
+BCB>4BB@8<A<A>=CB>130-
+@HWI-780
+TGGACGGAGAACTGATAAGGGC
++
+BBAABCAA<ABAB@?A>>0/29
+@HWI-766
+TGGACGGAGAACTGATAAGGGCAA
++
+A;3=C@-<A@CCA;@CC<00;@A@
+@HWI-621
+TAGGAACTTCATACCGTGCTCT
++
+BCBAA@ACBBBCB@9/:78?9A
+@HWI-798
+TGGACGGAGAACTGATAAGGGC
++
+6569=BB>>@B*B>@A=AB@A<
+@HWI-815
+TGGAATGTAAAGAAGTATGGAG
++
+B915AB;:?=>7<>9?CA55@*
+@HWI-839
+TGGACGGAGAACTGATAAGGGC
++
+A6;?A;;>A?@BB8=CB;583?
+@HWI-884
+TGGAATGTAAAGAAGTATGGAG
++
+BBBABBBBBB?BB>1/@BB?BB
+@HWI-897
+TGGAATGTAAAGAAGTATGGA
++
+BAA?BB?BBB>6@@=>BB70?
+@HWI-671
+TGACTAGATCCACACTCATTAC
++
+B@BBBA;ACBBCBBBBA>ABBB
+@HWI-901
+TGGACGGAGAACTGATAAGGGC
++
+B;7@@>;=@<@?A:>CB@;8:@
+@HWI-914
+TGGACGGAGAACTGATAAGGGC
++
+BBCCBCC:BBB@CCACBB<>AB
+@HWI-926
+TGGAATGTAAAGAAGTATGGAG
++
+BC=AAB;BBAB@B@+7BAA7BA
+@HWI-937
+TGGACGGAGAACTGATAAGGGC
++
+BBBBA4@>=;BAB=ACC@*8;<
+@HWI-973
+TGGAATGTAAAGAAGTATGGAG
++
+A59>BB;@BB?>>=3;BA71:5
+@HWI-242
+TGGACGGAGAACTGATAAGGGCTT
++
+A31@<05=;>@1B>>AA@:;5<BA
+@HWI-1006
+TGACTAGATCCACACTCATTA
++
+B@CBCBA?BB@ABBACCBACB
+@HWI-1002
+TGGACGGAGAACTGATAAGGGCAA
++
+B85@B?(99AB?A<?BBA2>9A9>
+@HWI-1008
+TGGACGGAGAACTGATAAGGGC
++
+A59=B=5=8@@>CA<9@=*,8>
+@HWI-568
+TGGACGGAGAACTGATAAGGGCAA
++
+A88@BA;>?@A?BA<BB=;817==
+@HWI-898
+TAAAGCTAGATTACCAAAGCAT
++
+B@@B===BBBBB@A;6<>0###
+@HWI-911
+CTGACTAGATCCACACTCATTA
++
+BA<AB6B<B?AAAB4*BB=?>C
+@HWI-1021
+TGGACGGAGAACTGATAAGGGC
++
+B79>?B;>:?B;B@<>6</6-7
+@HWI-1020
+TGGACGGAGAACTGATAAGGGC
++
+B;>?@>9>A=A=B;ACB>8;==
+@HWI-1022
+GGGAGCGAGACGGGGACTCACT
++
+AA@AC=B=79ABCBA1@B236A
+@HWI-1026
+TGGACGGAGAACTGATAAGGGCAAA
++
+B>?B>=AAB@?4B>>BBAAA>?A?A
+@HWI-1033
+TGGAATGTAAAGAAGTATGG
++
+?43>AA;@A=>89?><B@;4
+@HWI-1055
+TGGACGGAGAACTGATAAGGGC
++
+A>=CB;>>;BBACB@CC@3:+?
+@HWI-1059
+TGGACGGAGAACTGATAAGGGC
++
+BA?AABA@BBBABBBBBA980:
+@HWI-1060
+TGGACGGAGAACTGATAAGGGC
++
+B?>=BCBA<ACA@)<CB@*4;9
+@HWI-1067
+TGGACGGAGAACTGATAAGGGC
++
+BBBA@?B@B=A?B>;BA<56<=
+@HWI-1080
+TGGACGGAGAACTGATAAGGGC
++
+B@?@BCA@C<BBB?=CBB?A@?
+@HWI-1084
+TGGACGGAGAACTGATAAGGGC
++
+BCBCABB?BAC@BBBCBB@A9;
+@HWI-1087
+TGGAATGTAAAGAAGTATGGAG
++
+B;?@BB=@BB@4>A>ABA4.<7
+@HWI-1090
+CGGATGATGGTTCACAACGACC
++
+A;59A7?AA3AB>@B@B:2@<A
+@HWI-1093
+TAAGGAAATAGTAGCCGTGAT
++
+BAAA?BABBA1<A;?;#####
+@HWI-1066
+TGGACGGAGAACTGATAAGGGC
++
+BCCBBBCA>@>;>>=BA>@>##
+@HWI-1094
+TGGAATGTAAAGAAGTATGGAG
++
+BB:2@A:A@BA6A@=4B;5';;
+@HWI-1115
+AATGGCACTGGAAGAATTCACGGG
++
+BCBAB@B@>><>@;;A<;>#####
+@HWI-1120
+TGGAATGTAAAGAAGTATGGAG
++
+B67@B?2?BBA=A@09BA47?/
+@HWI-1122
+TGGACGGAGAACTGATAAGGGC
++
+B@5>;0;;B==3AB>BA=6:29
+@HWI-1127
+TGGACGGAGAACTGATAAGGGC
++
+A8<?B@3?8;BBA;>BA>1###
+@HWI-1128
+TGGAATGTAAAGAAGTATGGAG
++
+B??>BBC>>A9=@<5:B@36>A
+@HWI-1119
+TCAGGTACCTGAAGTAGCGCGCG
++
+BB?>??A>B=(6?7#########
+@HWI-1149
+TGGAATGTAAAGAAGTATGGAG
++
+BAA>@BB@B?<8<?A@@@95;3
+@HWI-1155
+TGGACGGAGAACTGATAAGGGC
++
+B?>?B<?>??BAB;;BB?2###
+@HWI-1159
+TGGACGGAGAACTGATAAGGGC
++
+BBBCBA<@;;BBBA@B@>36)?
+@HWI-1175
+TGGACGGAGAACTGATAAGGG
++
+A0)>B@69;4>@BB>BAA=A:
+@HWI-1174
+TGGACGGAGAACTGATAAGGGCAA
++
+B?>@A6@;@>@:?0:C@:######
+@HWI-1181
+TGGACGGAGAACTGATAAGGGC
++
+BCACC?BAABB?CBACCCAA?B
+@HWI-1196
+TGGACGGAGAACTGATAAGGGC
++
+B>ABBC=>;=BAABB?BB@<81
+@HWI-1197
+TGGACGGAGAACTGATAAGGGCA
++
+BBBB>?A>;@B;BB@B@909879
+@HWI-1201
+TGACTAGATCCACACTCATTAA
++
+A>CB@A?BBBBCBB>BB@CBA?
+@HWI-1206
+TGGACGGAGAACTGATAAGGGC
++
+B?ACBCC@BBBB@0?CB@;2;<
+@HWI-985
+AATGGCACTGGAAGAATTCACGGG
++
+BB@6@A>@:.:>@96A########
+@HWI-1236
+TGGACGGAGAACTGATAAGG
++
+BA<BB@>?AABBB>>BB@..
+@HWI-1245
+TGGAATGTAAAGAAGTATGGAGA
++
+BCBBBCB<=:?78/5ABBAAB9B
+@HWI-1266
+GGCGAACATGGATCTAGTGCACG
++
+?ABAABCB@=@?A?=:0>3=>##
+@HWI-1228
+TGGACGGAGAACTGATAAGGG
++
+<@:B@==?@>A@A>;BB?)06
+@HWI-1275
+TAAAGCTAGATTACCAAAGCAT
++
+BBBBBACA:BCBCBBBB@7;BB
+@HWI-1279
+TGGAATGTAAAGAAGTATGGAG
++
+BBBBABBABA@8=ABAAB>4>2
+@HWI-1282
+TGGAATGTAAAGAAGTATGGAG
++
+B?<BAC?BBB>8A@-5BB@:>=
+@HWI-1312
+TGGACGGAGAACTGATAAGGGC
++
+B:2<A@2@8;@A?2;BA<-02>
+@HWI-1317
+TGTGATGTGACGTAGTGGAA
++
+BB?B?BB?@<:0<;9>=<<:
+@HWI-1323
+TGGACGGAGAACTGATAAGGGC
++
+BAACB@@@BBBBCCBCC?;>5>
+@HWI-1304
+TGTGATGTGACGTAGTGGAA
++
+A92@?@1@7?:)9?>>708@
+@HWI-1337
+TGGACGGAGAACTGATAAGGGC
++
+B?BCA6A@BBC@CB>BCB;=;A
+@HWI-1344
+TGGAATGTAAAGAAGTATGGAG
++
+A86<BA4?B@=9=:07A?.080
+@HWI-1346
+TGGACGGAGAACTGATAAGGGC
++
+BBBCBCC=CBBBB;@BB@00;:
+@HWI-1348
+TGGAATGTAAAGAAGTATGGAG
++
+BB:BBC=BAB<:8?7@A@89A+
+@HWI-1352
+TGGACGGAGAACTGATAAGGGC
++
+B=7BB=6A<ACBB=ACCA2=6@
+@HWI-1368
+TGGAATGTAAAGAAGTATGGAG
++
+BCCBCCBBBB@@A??ABA:6@+
+@HWI-1383
+TGGACGGAGAACTGATAAGGGC
++
+BB@@A=AA?=BAB;=BB;//8=
+@HWI-1318
+TGTGATGTGACGTAGTGGAA
++
+BBAB@B@A?A>9>@9@47<A
+@HWI-1415
+TGGACGGAGAACTGATAAGGGCAA
++
+BCBCBCCB?BBACBBBB@25>5;8
+@HWI-1418
+TGGAATGTAAAGAAGTATGGAGT
++
+B?B@BCBBBC@8@@6=B>//>7A
+@HWI-1434
+TCAGGTACCTGAAGTAGCG
++
+B?6B>180?=)9AA3@###
+@HWI-1332
+TATCACAGCCATTTTGACGAGTT
++
+BCB@BABB=BBBBBA58;8>;@B
+@HWI-1447
+TGGACGGAGAACTGATAAGGGC
++
+BBBB6CBBCABBBCBCCC5A9=
+@HWI-1449
+TGGACGGAGAACTGATAAGGGC
++
+B;>@>=@?7@BA7BBC<6;###
+@HWI-1451
+TCAGGTACCTGAAGTAGCGCGCG
++
+BA@;@7<6:?91=##########
+@HWI-1460
+TGGAATGTAAAGAAGTATGGAG
++
+BCCBCCCBCB?:?A?>BA75;<
+@HWI-1375
+TGACTAGATTCACACTCATTA
++
+BCABCB?ACC>BA39C@5BC<
+@HWI-1481
+TGGAATGTAAAGAAGTATGGAG
++
+B6/9B@5ABBA??>048989<0
+@HWI-1489
+TGTGATGTGACGTAGTGGAA
++
+BAA8ABBBAB;+::1AB78B
+@HWI-1505
+TGGAATGTAAAGAAGTATGGA
++
+B?)=?A9<?7<>;A=9BB###
+@HWI-1508
+TGTGATGTGACGTAGTGGAA
++
+B>>A:=0?::3.960:####
+@HWI-1517
+TGGAATGTAAAGAAGTATGGAG
++
+B>>>BB>ABB=4==1=B>0)<0
+@HWI-1530
+TGGACGGAGAACTGATAAGGGCA
++
+BCBBBCB>BBBA@;?BA<><BA?
+@HWI-1532
+TGGACGGAGAACTGATAAGGGC
++
+B@6A>9;>:=B:A9>@>?3###
+@HWI-1534
+TGGACGGAGAACTGATAAGGGC
++
+B9;BAA>=A@B?A>@BB@><4<
+@HWI-1538
+TCAGGTACCTGAAGTAGCGCGCG
++
+BCCCBBBAA?):@@;;;:39###
+@HWI-1547
+TGGACGGAGAACTGATAAGG
++
+A6;@@A6<>?B<A<?BB@45
+@HWI-1556
+TGGACGGAGAACTGATAAGGGC
++
+BBBBBCCA?AA@CBAB??:@;>
+@HWI-1572
+TGGAATGTAAAGAAGTATGGAG
++
+BAA@BBA@@<>?@A=<AA>9?9
+@HWI-1583
+TGGACGGAGAACTGATAAGGGC
++
+B>?B?50??@C@BABBBA66)8
+@HWI-1592
+TGGACGGAGAACTGATAAGGGC
++
+BBB>BBB;?>@@CB@BBAAA>=
+@HWI-1522
+TGGACGGAGAACTGATAAGGGCAA
++
+A51BB?5?BABAA;ACA7=@;>@>
+@HWI-1602
+CGGCACATGTTGAAGTACACTCAA
++
+?@;AB>C@@;=1A;4=?A?BCAC=
+@HWI-1521
+TCAGGTACCTGAAGTAGCGCGCG
++
+BA?8?4>=?@>;:7;98######
+@HWI-1625
+TCAGGTACCTGAAGTAGCGCGCG
++
+BB@A@';9A;*=9?4<7994###
+@HWI-1631
+TGGAATGTAAAGAAGTATGGAG
++
+B>A>BB6@BA=8<<6=BA75>8
+@HWI-1632
+TGTGATGTGACGTAGTGGAA
++
+BA??;B>><=<4=<7?:599
+@HWI-1636
+TGGACGGAGAACTGATAAGGGCA
++
+A5=AAAA<B@B:BA=BBA=?:@>
+@HWI-1662
+TGGACGGAGAACTGATAAGGG
++
+A65<A@7<>=A@A7:B@=303
+@HWI-1666
+TGGACGGAGAACTGATAAGGGC
++
+BABCBCBBCACBA2<CB@??@>
+@HWI-1668
+TGGAATGTAAAGAAGTATGGA
++
+B6-:@A36>B?<A:.7<@###
+@HWI-1564
+CGGACGGTATATGGGTTAATATT
++
+BCBBCCBAACBB:<,7BA@BBCB
+@HWI-1673
+TGGACGGAGAACTGATAAGGGC
++
+BA8=@B>A?7B?BBBBB@6>;8
+@HWI-1683
+TGGACGGAGAACTGATAAGGGC
++
+BAABABB?B?@;B>@BBAAA=9
+@HWI-1690
+TGTGATGTGACGTAGTGGAA
++
+A;;>?A0?9>9549)=1589
+@HWI-1702
+AATGGCACTGGAAGAATTCACGGGT
++
+@BA>@;@=:9@;=95;<@;<9'.3<
+@HWI-1717
+AGGACGGGAAGGTGTCAACG
++
+AA?BAB?A<?@><?@:<;=9
+@HWI-1724
+TGGAATGTAAAGAAGTATGGAG
++
+B?<=AB@A@<>B><50<?:<>;
+@HWI-1728
+TGGAATGTAAAGAAGTATGGAG
++
+@@6@BB;@BC@>>A7;BB=:?)
+@HWI-1729
+CGAATAGCGTTGTGACTGA
++
+@B@CB@BB89ABBA?B8>?
+@HWI-1733
+TGGAATGTAAAGAAGTATGGAG
++
+BAABBBB?BB>0=>8@B=/,;(
+@HWI-1743
+TGTGATGTGACGTAGTGGAA
++
+BB>A9B+><<@>7;0=A><>
+@HWI-1744
+TGGACGGAGAACTGATAAGGGC
++
+B@6?BB;>B?B@BA?B=:6>;A
+@HWI-1754
+TCAGGTACCTGAAGTAGCGCGCG
++
+BBBA;;=1;;)940446-#####
+@HWI-1640
+TGGACGGAGAACTGATAAGGG
++
+B99@AB;<BAC?BBBBB@>@4
+@HWI-1769
+TCAGGTACCTGAAGTAGCGCGCG
++
+BCBA<=BCBC?BB>5A9;6:%:.
+@HWI-1773
+TGTGATGTGACGTAGTGGAAC
++
+BBAA>A>>;;73?:9?:2.34
+@HWI-1596
+TGGACGGAGAACTGATAAGGG
++
+BB@B@AA?A@B?B:>AB@943
+@HWI-1780
+TGGACGGAGAACTGATAAGGGCAA
++
+B;;BA@9?BAB?=-<@>@3:4>;?
+@HWI-1800
+TGGAATGTAAAGAAGTATGGAG
++
+BAB?BBA@B@:)9=AAB@34?0
+@HWI-1806
+TGGAATGTAAAGAAGTATGGA
++
+BBBBCCABCCA=AA<;CB@2>
+@HWI-1821
+TGGAATGTAAAGAAGTATGGAG
++
+B@8?AB;@BB>;>>;;@@:2=7
+@HWI-1766
+TGGACGGAGAACTGATAAGGGC
++
+BCACCCC?:C@7BAAACB6>2<
+@HWI-1830
+TGGACGGAGAACTGATAAGGGC
++
+BA@BB>@>>@B@A5>@B@866;
+@HWI-1846
+TGGACGGAGAACTGATAAGGGC
++
+BB?;5BA?@8B;>>@A9<####
+@HWI-1851
+TGGAATGTAAAGAAGTATGGA
++
+BBB@BBBABB@3??A@AB###
+@HWI-1856
+TGGAATGTAAAGAAGTATGGAG
++
+B@:=@B?BBB=:>@@>BB=:>;
+@HWI-1873
+TGGAATGTAAAGAAGTATGGAG
++
+BB@?BB@BBB???=>=B@2)<=
+@HWI-1876
+TGGACGGAGAACTGATAAGGGC
++
+A==B@?B;?>B=72;B==/<##
+@HWI-1885
+TGGAATGTAAAGAAGTATGGAG
++
+A74@BCBABAA:<?03BB:3=2
+@HWI-1888
+TGTGATGTGACGTAGTGGAA
++
+B@@:>>1<:62;?=7928=@
+@HWI-1889
+TGGACGGAGAACTGATAAGGGC
++
+A>;@B@;@=@CAB;@BB?330:
+@HWI-1894
+TAAGGAAATAGTAGCCGTGAT
++
+BAA>><AAA=<<>########
+@HWI-1912
+TGGACGGAGAACTGATAAGGG
++
+=<??67;8B>A4?A=)>1@?:
+@HWI-1914
+TAAGGAAATAGTAGCCGTGAT
++
+BBBB7BB>9B;=AA:A>####
+@HWI-1915
+TAGGAACTTCATACCGTGCTCT
++
+BC=9@BBCCBCBCBA*?3?BAB
+@HWI-1931
+TGGACGGAGAACTGATAAGGGCAA
++
+A6;@B=5@=@BBCB?B@A13)=9@
+@HWI-1940
+TGGAATGTAAAGAAGTATGGAG
++
+BC@BBBCBBAA@;?7<BA67?B
+@HWI-1939
+TGTGATGTGACGTAGTGGAA
++
+B?B??B<B??=8A=8>757:
+@HWI-1943
+TGTGATGTGACGTAGTGGAA
++
+BA<:<B<;5;94:<-<7###
+@HWI-1949
+TGTGATGTGACGTAGTGGAA
++
+BA?>7BB>>=?AA<9?>>5;
+@HWI-1955
+TGGAATGTAAAGAAGTATGGAG
++
+B?;>AB;>BA???>49BA92=2
+@HWI-1884
+TGGACGGAGAACTGATAAGGGC
++
+B==A=3<AB@B<A;AB@=5/5?
+@HWI-1973
+TGTGATGTGACGTAGTGGAAA
++
+A9><=B>;8877><9?:####
+@HWI-1983
+TGGACGGAGAACTGATAAGGGC
++
+B=1@B@6:B@B@BB=BBBA@;@
+@HWI-1980
+TGGACGGAGAACTGATAAGGGC
++
+BABBB=>>6?C@BBBBA=(52@
+@HWI-1992
+TGTGATGTGACGTAGTGGAA
++
+BA@9=@7@A?<>>?:@7:9>
+@HWI-1995
+CAAATTCGGTTCTAGAGAGGTT
++
+B<ABACBCA<C@A@==;=BBAB
+@HWI-2007
+TGGACGGAGAACTGATAAGGGT
++
+B?BBBA6ABBB?CBACB?AA?>
+@HWI-2011
+ACTGAATTCTCGTGGGTCTGCAT
++
+BAB==B?B@A@8;/.(:?A>:<?
+@HWI-2017
+TCAGGTACCTGAAGTAGCGCGCG
++
+BC@?727=B@5<<##########
+@HWI-2013
+AATTGCACTAGTCCCGGCCTG
++
+BCA>5;A=A>?=B@@4:?@8'
+@HWI-2040
+TGGACGGAGAACTGATAAGGGC
++
+BBBBB=BBB9BBB@?BB:88>=
+@HWI-2049
+TGGAATGTAAAGAAGTATGGAG
++
+BA>=ABAABB>5><0;@?.-94
+@HWI-2053
+TATCACAGCCATTTTGACGAGTT
++
+BCBBCBC>A@ABCBC@@=;</:C
+@HWI-2055
+TATTGCACTTGAGACGGCCTTA
++
+BBCC>BCBCBAA;<B</-=@2:
+@HWI-2104
+TGGACGGAGAACTGATAAGGGC
++
+A9@B@AA>A?A?BAACA=805;
+@HWI-2106
+TGGACGGAGAACTGATAAGGGCT
++
+A:5;BA8;A=A@?4;BA93-0:=
+@HWI-2100
+TCAGGTACCTGAAGTAGCGCGCGTTAT
++
+BB@579A@B<3;=07615830,6>=##
+@HWI-2072
+GGACGGAGAACTGATAAGGGCTCGG
++
+ABCBBA.3@@:A=@B=@@0<;@;.<
+@HWI-2124
+TGGACGGAGAACTGATAAGGGC
++
+?00@B@1>?AB@>9;B?>503?
+@HWI-2136
+TGGACGGAGAACTGATAAGGGC
++
+BBBBBBCABB@>BB?CBB?>>:
+@HWI-2137
+TGTGATGTGACGTAGTGGAA
++
+B>=<:<-149;8>8.93379
+@HWI-2141
+TGTGATGTGACGTAGTGGAAA
++
+BB@AAB<@8>=+=@1=/5=B?
+@HWI-2117
+CGGCACATGTTGAAGTACACTCA
++
+B>ABBABA>@?)=?0>CABABBB
+@HWI-2146
+TGGACGGAGAACTGATAAGGGC
++
+B;;BB4=ABBCBB8@CC;/5;B
+@HWI-2148
+TGGACGGAGAACTGATAAGGGC
++
+BBABA??@@AA:B@@AB@6=8=
+@HWI-2151
+TGGACGGAGAACTGATAAGGGC
++
+BCA@>BCB?3ABBA?B?A7@6B
+@HWI-2145
+TGTGATGTGACGTAGTGGAA
++
+B>?BBB>A>A;9<@1@559<
+@HWI-2158
+AATGGCACTGGAAGAATTCACGGG
++
+BCBABBBB?50=?8;A=>9>;###
+@HWI-2142
+TGGACGGAGAACTGATAAGGGC
++
+A4;@<@?=<@>@BB>BB8<0:7
+@HWI-2140
+TCAGGTACCTGAAGTAGCGCGCG
++
+BBABB?B=>=0<@>7<394:4:4
+@HWI-2191
+AATGGCACTGGAAGAATTCACGTG
++
+ABB?A@ABA;5;>8<??#######
+@HWI-2183
+TGGACGGAGAACTGATAAGGGC
++
+BAA?@>@>:;>=A>>BA:3107
+@HWI-2203
+TGGAATGTAAAGGAGTATGGAG
++
+BBABBC@?@A83?A>BBB>7?:
+@HWI-2200
+TCTTTGGTATTCTAGCTGTAGA
++
+BBCBCC@BACC?CB:AB5@###
+@HWI-2209
+TGGACGGAGAACTGATAAGGGC
++
+BAAA?A>>=;@;B;>@@>5/94
+@HWI-2218
+TGGAATGTAAAGAAGTATGGAG
++
+BC>BBCBBBC?A?@<>BB2);7
+@HWI-2222
+TGGACGGAGAACTGATAAGGGCA
++
+BCABBB?@BBCBBAABB@94;@@
+@HWI-2227
+TGGACGGAGAACTGATAAGGGC
++
+BBBBBBB?B@BAA;>BBA:0;>
+@HWI-2225
+AATGGCACTGGAAGAATTCACGG
++
+ABBA@?B@:6>;?@9@@B=?::?
+@HWI-2230
+TGGACGGAGAACTGATAAGGGC
++
+BBBB@CB@?>B?A;>=@>2/7;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.sorted.fa	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,500 @@
+>1
+>10
+>100
+>101
+>102
+>103
+>104
+>105
+>106
+>107
+>108
+>109
+>11
+>110
+>111
+>112
+>113
+>114
+>115
+>116
+>117
+>118
+>119
+>12
+>120
+>121
+>122
+>123
+>124
+>125
+>126
+>127
+>128
+>129
+>13
+>130
+>131
+>132
+>133
+>134
+>135
+>136
+>137
+>138
+>139
+>14
+>140
+>141
+>142
+>143
+>144
+>145
+>146
+>147
+>148
+>149
+>15
+>150
+>151
+>152
+>153
+>154
+>155
+>156
+>157
+>158
+>159
+>16
+>160
+>161
+>162
+>163
+>164
+>165
+>166
+>167
+>168
+>169
+>17
+>170
+>171
+>172
+>173
+>174
+>175
+>176
+>177
+>178
+>179
+>18
+>180
+>181
+>182
+>183
+>184
+>185
+>186
+>187
+>188
+>189
+>19
+>190
+>191
+>192
+>193
+>194
+>195
+>196
+>197
+>198
+>199
+>2
+>20
+>200
+>201
+>202
+>203
+>204
+>205
+>206
+>207
+>208
+>209
+>21
+>210
+>211
+>212
+>213
+>214
+>215
+>216
+>217
+>218
+>219
+>22
+>220
+>221
+>222
+>223
+>224
+>225
+>226
+>227
+>228
+>229
+>23
+>230
+>231
+>232
+>233
+>234
+>235
+>236
+>237
+>238
+>239
+>24
+>240
+>241
+>242
+>243
+>244
+>245
+>246
+>247
+>248
+>249
+>25
+>250
+>26
+>27
+>28
+>29
+>3
+>30
+>31
+>32
+>33
+>34
+>35
+>36
+>37
+>38
+>39
+>4
+>40
+>41
+>42
+>43
+>44
+>45
+>46
+>47
+>48
+>49
+>5
+>50
+>51
+>52
+>53
+>54
+>55
+>56
+>57
+>58
+>59
+>6
+>60
+>61
+>62
+>63
+>64
+>65
+>66
+>67
+>68
+>69
+>7
+>70
+>71
+>72
+>73
+>74
+>75
+>76
+>77
+>78
+>79
+>8
+>80
+>81
+>82
+>83
+>84
+>85
+>86
+>87
+>88
+>89
+>9
+>90
+>91
+>92
+>93
+>94
+>95
+>96
+>97
+>98
+>99
+AAAAAATATCTTTTTTAACTCGTGGCC
+AAAAACACACAGACACAAGCAGCAAT
+AAAACCCGGACAAACCATCGGAGGA
+AAAAGGAAAAACAGAAAAATTGGG
+AAAATCGACTGCCGAAAACATTTTAA
+AAAGACAAAAGAAATACAGGCACT
+AAATTGCAAAGATGGAAAATAAAACT
+AACAACTGCAAACATCTACCACA
+AACAGGAAAAACAGAAGGATTTCTA
+AACAGGGAGATCAACAGCGTTGACA
+AACATTTTATCAATTATACATTA
+AACGGGGAATAAGGGTTCG
+AACTCTCTAATTTAACTTTGTGC
+AACTTTAAATTTTTAATAACCTT
+AAGACAACAATGACATATAAGACG
+AAGATGGAGTAGTTTTTT
+AAGCACGCCTTACCACAATTTATAA
+AAGGAATTAAAGCAATAATTCTAA
+AAGTGAAGAAGTAGTTTTT
+AATATAAAAATACAATCAACCATTGCA
+AATCTACAATTTCCATTACGACTCC
+AATGACACACTCTTCATCAAC
+AATGTCACTTGAAGAATTCACGT
+AATTGCAACAGAGACTGGAA
+AATTTATTTAATTTATATTCTAACTAA
+ACAAATCATAAATTTTTTTTTACT
+ACAACCTCAACTCATATTT
+ACAGCAGGACGGTGATCA
+ACAGCAGGACGGTGATCA
+ACCAGCACCTTCCGACTCAACGTCAAA
+ACTAAACTTTTCTTACCATATTTCTA
+AGAACAATTAAATAAAATAGCATA
+AGATGTTGATCTAAACTCTCCCA
+AGGAATATGATGAAATAAAAAAAT
+AGGATTTTTAAGCCCATATGTTTCC
+ATAAAGCTAGATTACCAAAGCAT
+ATGAAATTCGAACAATACGTC
+ATGTTATTTACTTTTTCCCCTTATA
+ATTATTAATAAATTATTATAA
+ATTTTACTTCATCATTTTC
+CAAACGGAACAAGACATCACCATC
+CAAATAACAAACTGAATAAACGAAA
+CAACACATGACGCGACAATTCTTG
+CAAGAATACAAAAAATACTAATTA
+CAAGATATGAACAAAGCAAAGACAC
+CAAGGCTCAGAAGAACATCACCAAGACC
+CAATTTTTAATTCCTTTTTTCTTCTT
+CACAGACTGAGGCAGAAAAAACAA
+CACCGAACCGGGAAGGCGAACAAC
+CACGACTTTATTCTTTTTATCTCA
+CAGATTCACTGATTTTCTTACGCC
+CAGTTTCACAAAAGATCTTTTAA
+CATAATATAAACTTATCTT
+CATACCTACAAAAAAGCTTCTCTTAC
+CATACCTTAAATTATCTCTTTCTT
+CATTAATTCATCCATTTAAACTAA
+CCAGAAAACAATACAACATCCTCA
+CCATATATGACTGACTCATTTCAC
+CCCGAAAAGCCGAGGACGACTTA
+CCGACCGAGCAAATAAACACAGGAACG
+CCGCGATCTGCTTATTTATAATCTT
+CCTTACTCAACATACTTAATCATACTTA
+CGATATTTTCTCCTCGTACC
+CGATATTTTCTCCTCTGACC
+CGATATTTTCTCCTCTTACC
+CGATATTTTCTCCTCTTACCT
+CGCAACCAGCAGCAACTCCTAGCAT
+CGCCGCAAGATGAATACTCTAATGA
+CGGAAAAGAATGTAGACCATTTAA
+CGGACGGTATATTTTTTAATATAA
+CGGCACATGTTGAATTACACTCA
+CTAGACAAGATGCTATAAATTTTAAA
+CTATATTTTCTCTCTTACC
+CTGGAAACTATTGATCAAATT
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+GAAACAAACAACACATACCCTCTGGC
+GAAACCATTATCTTATCTTTATACA
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+GAACAATTTTTCAATTTTTTACATTA
+GAAGAGGAGGAGGAGTTTGTAAG
+GAATGATCGCACCACCACCTCAACGTT
+GACGAAACGCAACAACAAAATGGACG
+GAGAACTTTTAATCATTTTAC
+GAGAATAAATATTTCAATGGTCTATTG
+GAGGATTAAAAGAACGGTTTATAA
+GCAGATAGAAATCAATACAAAAATC
+GCCAACGACCATACCACGA
+GCCGGGGCGTGAGATGTCTGCATTA
+GGACGGAGAACTGATAAGGGCA
+GGACGGAGAACTGATAATGGC
+GGAGATTGTAGAACGAAAGGAAAAT
+GGTATCTTTATATTTTAATTTTCTT
+NATTCTTACTCCATTTCAATTTACT
+TAAAAATAATTGTCTTTAATTTCA
+TAAAATAAACCAAAACCCAAAAAT
+TAAAATAAATAAGTCCGACGACAA
+TAAAATTGTAATATTTAAATAATAT
+TAAAGAAGAAGAATTGATTTTAAT
+TAACAAATAGAACGTTCTAATTTAAA
+TAACATAAATTTTAATCATAAATTG
+TAACCTTGCAGAACTATACGATTCAAA
+TAACGGAGCACGAGAACGAAGTGG
+TAACTACTTTTACATTAATACTAA
+TAACTTTAACTTTTTTACT
+TAAGAAACTGAGCTAACGCAATGTACC
+TAAGTTTTTAATCATTTTTTTT
+TAATAAATTATTAAATAAAAAAAAAA
+TAATAAGAAACTGTTCAAACAATCCAC
+TAATAATTTAAATAAATATAAATTT
+TAATCAAAAAACTCTTCATTTTTA
+TAATTACCATTGCTAACTATCCA
+TACAAAAAATGCGAAAATTGACCCT
+TACAAAACAAACAAATTACAATCTAAA
+TACAAACGGAACTTTCGTCATAA
+TACAAACGGAACTTTCTTCATAACTTC
+TACAAACGTAATTTTCGCATAACATC
+TACAAATTTTTTTTTCTTTCTTAT
+TACAACTAACATCCTTTCTTCTTCC
+TACAAGACTAAAACAAACGTGAAGT
+TACACACTCATCAACCAAAGGACG
+TACACACTCATCAACCAAAGTACGTA
+TACACAGACTTACAAAACACATCCTTC
+TACACCTCTTTTTACTTTTTTATT
+TACAGACAACACATACGGACTTAA
+TACAGACAACACATACGGACTTAAGT
+TACAGAGAAATATACAACACTCACC
+TACCTCTTTATTAACCTCCACCTCTA
+TACTATATACTTCTTCAAATCACA
+TACTATTTTATTATACATACATACATTA
+TACTCAATAGAACTCTACTCACTCATA
+TACTGAAAACGGGCGCATATCAGTGG
+TACTGAAACAAGGAAACACAAGC
+TACTTTTTTCTTAATTTTTTATTAAAC
+TAGAACTCGAACCAGAGCTCC
+TAGACTTTCTACTCATTATTAC
+TAGCCTTTACTAGGCTTTTTCTAA
+TAGCGAGATGGACCAACGTGCTGT
+TAGGAACTTCATACCGGTCTC
+TAGGTACTTACCTTTTTTTTACACAA
+TAGTCATACATACCTAATTATACATA
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+TAGTTACCTTCATATCTCTCTTTA
+TAGTTTCACTACTTTATTCTTTTTA
+TAGTTTTAAATATTTCTTTTTTTC
+TATAATTTTATTTTATATTTTCTCT
+TATATAAATCTTCAACATCAA
+TATATTGCCTCCCCATAATCCTT
+TATCTATCTTTGATCTTCTTTTCA
+TATCTGATCAACAATCTTTTCCCAT
+TATGGAGAAACAGCGATATAAGTCA
+TATGGATTATTTCAAAATTTTTTTTT
+TATTATACATAGAATAACAAATCTTT
+TATTATCATCTCGTTCTTCCTTCTC
+TATTATTTTTTTATTCCATTCATAT
+TATTCAATCACTCCATTATATATAACA
+TATTCCGACAATACCTTCTTTAC
+TATTTATAAATTTTTTCTTGAGAC
+TATTTCACTTTATACTTCCTTAA
+TATTTCAGCAACAGACTAAGACTAA
+TATTTCCAACCTTCAACCTCAAATAA
+TATTTCTTTTTTAACTTCTTTTC
+TCAAAAAGTAATAGGGATCGTTA
+TCAAAGAACAATGTAAAGCCGCGAC
+TCAAATGCAAATTGGATTTATGA
+TCAACTGGCAAGAATTTTTGAAAATT
+TCAAGCCTTTTGAAGAACTGACCTAAA
+TCAGAAGAACAGAGAATTGATTTT
+TCAGAGTTCTACAGGTCCTACGATT
+TCAGTCTTTTTTTCTCTCCTA
+TCATTACACTTCTTACAAAAC
+TCCACAACAACTCTATCTAAAGCATT
+TCCACCTATTTATCTTTTCTT
+TCCCAACCCTCGAGCATCATTTTC
+TCCGAAAAATCGTAGGACCCGGGCA
+TCCGAAAACAAGGCCCGTCGCT
+TCCTGAGGACGAGGGGCGTTTAGC
+TCGCCGTAAAGCCAGTCGTTCTCC
+TCTAAACACCCACGAAAATCTCTTAC
+TCTAGTCTGAGCGTAGTACCAGATTG
+TCTATATTATTTTTATCAATTTTCACC
+TCTATTCATACAAAACACTAATACCC
+TCTATTTCTTTATTTTTTTTATTAT
+TCTCATCTTACAATTTTTTAAAACTT
+TCTCTTTTATTTTTATCTTTCCTT
+TCTGTATTTGACTTATTACTTTCTCC
+TCTTATTATCATTTTTTTATCCCTT
+TCTTATTTTAATCTTCCAATTTC
+TCTTCTATATAATCCTTTATTATAA
+TCTTTTTTTTAATACTTATTTTCATT
+TGAAACTGAAACTAACATACAAAATATT
+TGAAAGGAAAAACAGGACACGGGA
+TGACCAAAGACAAACAAACAATAAATA
+TGAGAATGACTTCTTCACGATCTCTT
+TGAGCGGAGAACCAGAGTTGATGAGC
+TGAGCTAGAACTGCACCCACTCCA
+TGAGGAAAACAGAAAAATGAGAGACA
+TGATGACGGGCAGCAGGGATTTTC
+TGATTTACTTACATTCTTTTTTT
+TGCTTGGACTACATATGGTTGAGGG
+TGCTTGGACTACATATGGTTGAGTG
+TGCTTGGACTACATATGGTTGAGTG
+TGCTTTTACTACATATTTTTTATTTTTTTA
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGACAAGAACCACGCGACGGGTGT
+TGGACGGAGAACTGATAAGGGC
+TGGAGTAGCACAGTCGTCTGAAATC
+TGTGTAATCTTTCTACTTCTTCTAC
+TTAAACAATTTGGAATTAATT
+TTAAAGACGCAACAACTAACATT
+TTAAGTTTTAGACATAATCTATTACAA
+TTAATATGTAATTTCATACCTCAC
+TTAATGACACACGGGAAAAACACCG
+TTACAATCTACTATTCTTTTATTA
+TTACTAGATCCACCCTCATTA
+TTAGTATTAATCTTCACTTAA
+TTATAATCACGGCACCCTATACA
+TTATTACATCGTCCACATATAACAAAA
+TTATTATCTATTTTAATTTTTCTTAA
+TTCAAGTAGATTGCATTTTTTAATA
+TTCATTTTATTTTTAAATATCTTTTTT
+TTCTAGCACAACACGCACACATATA
+TTCTTCCTTTTATCCTCTCTTAA
+TTCTTGATAACGCATCTTCTACAT
+TTCTTGGACTACACATTTTTTATTGTTTTA
+TTCTTTGACTACATATTTTTTATT
+TTCTTTTACTACATATTGTTTATTTTTTTA
+TTCTTTTACTACATATTTTTTATTTTT
+TTCTTTTACTACATATTTTTTATTTTTTTA
+TTGATTCTTCTTTTTCACAAAA
+TTGTAAAACATTCTTTCTCCTGAC
+TTTAAACACTTCCTACATCAAATTTC
+TTTAAGACTTATGAGCTTG
+TTTACCAGAGGAGTCGAGTTTTT
+TTTATTACAACCCTATCTTACCTCAA
+TTTATTACTTAGTCATAATTCCAA
+TTTCCGACAAATACACCATCTTC
+TTTGATACCTTTATACCATACCTATT
+TTTGTTTTTCATTTTTTTATCTTT
+TTTGTTTTTTACTATATTT
+TTTTATATTTCCTTATATCTTTACTA
+TTTTCATTTCTTCTTCAAATCCTTT
+TTTTCTTTACCCATCTTTACTTTCCC
+TTTTCTTTTTTTCATTTTCTCTTTTA
+TTTTTAACTCATTTTACAATTAAAC
+TTTTTAACTCCCATCATTTTTCCTC
+TTTTTATCAATTTTCACCATTC
+TTTTTTATCAATTTTCACCATTCAT
+TTTTTTATCATTTTTCACCTAAAAAA
+TTTTTTTTTTGTTTTTATTTTTATCAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.faw	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,494 @@
+>1_2
+ACAGCAGGACGGTGATCA
+>2_2
+TGCTTGGACTACATATGGTTGAGTG
+>3_2
+TGGAATGTAAAGAAGTATGGAG
+>4_1
+AATTTATTTAATTTATATTCTAACTAA
+>5_1
+TTATAATCACGGCACCCTATACA
+>6_1
+TCCGAAAAATCGTAGGACCCGGGCA
+>7_1
+TCCCAACCCTCGAGCATCATTTTC
+>8_1
+TTGATTCTTCTTTTTCACAAAA
+>9_1
+AGAACAATTAAATAAAATAGCATA
+>10_1
+CCAGAAAACAATACAACATCCTCA
+>11_1
+TCTAGTCTGAGCGTAGTACCAGATTG
+>12_1
+TTTTTAACTCCCATCATTTTTCCTC
+>13_1
+GCAGATAGAAATCAATACAAAAATC
+>14_1
+AAGTGAAGAAGTAGTTTTT
+>15_1
+AATGTCACTTGAAGAATTCACGT
+>16_1
+TTTACCAGAGGAGTCGAGTTTTT
+>17_1
+GGTATCTTTATATTTTAATTTTCTT
+>18_1
+TTACTAGATCCACCCTCATTA
+>19_1
+TATATAAATCTTCAACATCAA
+>20_1
+GAAACCATTATCTTATCTTTATACA
+>21_1
+GGAGATTGTAGAACGAAAGGAAAAT
+>22_1
+ACTAAACTTTTCTTACCATATTTCTA
+>23_1
+TCAAGCCTTTTGAAGAACTGACCTAAA
+>24_1
+TAACATAAATTTTAATCATAAATTG
+>25_1
+TAGCGAGATGGACCAACGTGCTGT
+>26_1
+TACAAACGTAATTTTCGCATAACATC
+>27_1
+CGGAAAAGAATGTAGACCATTTAA
+>28_1
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+>29_1
+CAAGAATACAAAAAATACTAATTA
+>30_1
+CATACCTACAAAAAAGCTTCTCTTAC
+>31_1
+TAAAATAAATAAGTCCGACGACAA
+>32_1
+AGGAATATGATGAAATAAAAAAAT
+>33_1
+TTTTTTTTTTGTTTTTATTTTTATCAT
+>34_1
+TTTTTAACTCATTTTACAATTAAAC
+>35_1
+CCGCGATCTGCTTATTTATAATCTT
+>36_1
+TAGGTACTTACCTTTTTTTTACACAA
+>37_1
+TCATTACACTTCTTACAAAAC
+>38_1
+TATTATACATAGAATAACAAATCTTT
+>39_1
+ATGTTATTTACTTTTTCCCCTTATA
+>40_1
+TCCGAAAACAAGGCCCGTCGCT
+>41_1
+TGCTTTTACTACATATTTTTTATTTTTTTA
+>42_1
+GAACAATTTTTCAATTTTTTACATTA
+>43_1
+AACATTTTATCAATTATACATTA
+>44_1
+TATCTGATCAACAATCTTTTCCCAT
+>45_1
+AATTGCAACAGAGACTGGAA
+>46_1
+TATTCAATCACTCCATTATATATAACA
+>47_1
+CAAACGGAACAAGACATCACCATC
+>48_1
+TCTTCTATATAATCCTTTATTATAA
+>49_1
+TGATGACGGGCAGCAGGGATTTTC
+>50_1
+TTTATTACAACCCTATCTTACCTCAA
+>51_1
+AACAGGAAAAACAGAAGGATTTCTA
+>52_1
+CGATATTTTCTCCTCGTACC
+>53_1
+AAATTGCAAAGATGGAAAATAAAACT
+>54_1
+TACACACTCATCAACCAAAGGACG
+>55_1
+TATATTGCCTCCCCATAATCCTT
+>56_1
+TACAAACGGAACTTTCTTCATAACTTC
+>57_1
+GGACGGAGAACTGATAATGGC
+>58_1
+TTCTTTGACTACATATTTTTTATT
+>59_1
+TCTTTTTTTTAATACTTATTTTCATT
+>60_1
+TACTTTTTTCTTAATTTTTTATTAAAC
+>61_1
+AATATAAAAATACAATCAACCATTGCA
+>62_1
+CAGTTTCACAAAAGATCTTTTAA
+>63_1
+GAAACAAACAACACATACCCTCTGGC
+>64_1
+AATGACACACTCTTCATCAAC
+>65_1
+AAGATGGAGTAGTTTTTT
+>66_1
+ACAACCTCAACTCATATTT
+>67_1
+TTAAACAATTTGGAATTAATT
+>68_1
+TACAAAAAATGCGAAAATTGACCCT
+>69_1
+GGACGGAGAACTGATAAGGGCA
+>70_1
+TTAAGTTTTAGACATAATCTATTACAA
+>71_1
+TTATTATCTATTTTAATTTTTCTTAA
+>72_1
+CACCGAACCGGGAAGGCGAACAAC
+>73_1
+TCCACCTATTTATCTTTTCTT
+>74_1
+TCGCCGTAAAGCCAGTCGTTCTCC
+>75_1
+TCAAAGAACAATGTAAAGCCGCGAC
+>76_1
+TGGACAAGAACCACGCGACGGGTGT
+>77_1
+CCCGAAAAGCCGAGGACGACTTA
+>78_1
+TTTTTTATCATTTTTCACCTAAAAAA
+>79_1
+TAGAACTCGAACCAGAGCTCC
+>80_1
+TCTATATTATTTTTATCAATTTTCACC
+>81_1
+TCTATTTCTTTATTTTTTTTATTAT
+>82_1
+TTTGATACCTTTATACCATACCTATT
+>83_1
+ATAAAGCTAGATTACCAAAGCAT
+>84_1
+GCCAACGACCATACCACGA
+>85_1
+CGGCACATGTTGAATTACACTCA
+>86_1
+TACTATTTTATTATACATACATACATTA
+>87_1
+TTAATGACACACGGGAAAAACACCG
+>88_1
+TAGTTTCACTACTTTATTCTTTTTA
+>89_1
+AACAGGGAGATCAACAGCGTTGACA
+>90_1
+CGATATTTTCTCCTCTGACC
+>91_1
+ACCAGCACCTTCCGACTCAACGTCAAA
+>92_1
+AAGGAATTAAAGCAATAATTCTAA
+>93_1
+TACAAAACAAACAAATTACAATCTAAA
+>94_1
+CAATTTTTAATTCCTTTTTTCTTCTT
+>95_1
+TACAGACAACACATACGGACTTAA
+>96_1
+TCTGTATTTGACTTATTACTTTCTCC
+>97_1
+TGAGCTAGAACTGCACCCACTCCA
+>98_1
+CGCCGCAAGATGAATACTCTAATGA
+>99_1
+TATTTCTTTTTTAACTTCTTTTC
+>100_1
+TTACAATCTACTATTCTTTTATTA
+>101_1
+TTTAAACACTTCCTACATCAAATTTC
+>102_1
+TGTGTAATCTTTCTACTTCTTCTAC
+>103_1
+TCTATTCATACAAAACACTAATACCC
+>104_1
+TGGAGTAGCACAGTCGTCTGAAATC
+>105_1
+AAGCACGCCTTACCACAATTTATAA
+>106_1
+CTGGAAACTATTGATCAAATT
+>107_1
+TACACAGACTTACAAAACACATCCTTC
+>108_1
+TTCAAGTAGATTGCATTTTTTAATA
+>109_1
+TTATTACATCGTCCACATATAACAAAA
+>110_1
+CAAGGCTCAGAAGAACATCACCAAGACC
+>111_1
+TGAGGAAAACAGAAAAATGAGAGACA
+>112_1
+TCAAAAAGTAATAGGGATCGTTA
+>113_1
+TAACTTTAACTTTTTTACT
+>114_1
+TATTCCGACAATACCTTCTTTAC
+>115_1
+TTTGTTTTTTACTATATTT
+>116_1
+TTCATTTTATTTTTAAATATCTTTTTT
+>117_1
+TACTCAATAGAACTCTACTCACTCATA
+>118_1
+TGAAAGGAAAAACAGGACACGGGA
+>119_1
+AAAATCGACTGCCGAAAACATTTTAA
+>120_1
+TACAGAGAAATATACAACACTCACC
+>121_1
+TCAACTGGCAAGAATTTTTGAAAATT
+>122_1
+GAGAACTTTTAATCATTTTAC
+>123_1
+TATTATCATCTCGTTCTTCCTTCTC
+>124_1
+TTTTCATTTCTTCTTCAAATCCTTT
+>125_1
+TAGTCATACATACCTAATTATACATA
+>126_1
+ATTTTACTTCATCATTTTC
+>127_1
+TCTCTTTTATTTTTATCTTTCCTT
+>128_1
+GCCGGGGCGTGAGATGTCTGCATTA
+>129_1
+AGGATTTTTAAGCCCATATGTTTCC
+>130_1
+CAAGATATGAACAAAGCAAAGACAC
+>131_1
+CAACACATGACGCGACAATTCTTG
+>132_1
+CAAATAACAAACTGAATAAACGAAA
+>133_1
+TGAGAATGACTTCTTCACGATCTCTT
+>134_1
+TCTTATTATCATTTTTTTATCCCTT
+>135_1
+TCAAATGCAAATTGGATTTATGA
+>136_1
+CCTTACTCAACATACTTAATCATACTTA
+>137_1
+TAGACTTTCTACTCATTATTAC
+>138_1
+TGAAACTGAAACTAACATACAAAATATT
+>139_1
+AAAACCCGGACAAACCATCGGAGGA
+>140_1
+TACAGACAACACATACGGACTTAAGT
+>141_1
+TATTTCAGCAACAGACTAAGACTAA
+>142_1
+AACTTTAAATTTTTAATAACCTT
+>143_1
+TATTTATAAATTTTTTCTTGAGAC
+>144_1
+TTAATATGTAATTTCATACCTCAC
+>145_1
+CACAGACTGAGGCAGAAAAAACAA
+>146_1
+TAAAGAAGAAGAATTGATTTTAAT
+>147_1
+TACTGAAAACGGGCGCATATCAGTGG
+>148_1
+TCAGTCTTTTTTTCTCTCCTA
+>149_1
+TATAATTTTATTTTATATTTTCTCT
+>150_1
+NATTCTTACTCCATTTCAATTTACT
+>151_1
+TTGTAAAACATTCTTTCTCCTGAC
+>152_1
+TAATTACCATTGCTAACTATCCA
+>153_1
+TTCTTCCTTTTATCCTCTCTTAA
+>154_1
+TCTAAACACCCACGAAAATCTCTTAC
+>155_1
+AAAAACACACAGACACAAGCAGCAAT
+>156_1
+CGGACGGTATATTTTTTAATATAA
+>157_1
+TATGGAGAAACAGCGATATAAGTCA
+>158_1
+TACAACTAACATCCTTTCTTCTTCC
+>159_1
+AACTCTCTAATTTAACTTTGTGC
+>160_1
+TCCTGAGGACGAGGGGCGTTTAGC
+>161_1
+TATTTCCAACCTTCAACCTCAAATAA
+>162_1
+TGGACGGAGAACTGATAAGGGC
+>163_1
+TTTAAGACTTATGAGCTTG
+>164_1
+TTAAAGACGCAACAACTAACATT
+>165_1
+TAGGAACTTCATACCGGTCTC
+>166_1
+CGATATTTTCTCCTCTTACC
+>167_1
+GAGGATTAAAAGAACGGTTTATAA
+>168_1
+GAATGATCGCACCACCACCTCAACGTT
+>169_1
+TTTTCTTTACCCATCTTTACTTTCCC
+>170_1
+AAGACAACAATGACATATAAGACG
+>171_1
+TAATAATTTAAATAAATATAAATTT
+>172_1
+TACTGAAACAAGGAAACACAAGC
+>173_1
+TCAGAAGAACAGAGAATTGATTTT
+>174_1
+CATACCTTAAATTATCTCTTTCTT
+>175_1
+TTCTTTTACTACATATTTTTTATTTTT
+>176_1
+AAAAAATATCTTTTTTAACTCGTGGCC
+>177_1
+TAACAAATAGAACGTTCTAATTTAAA
+>178_1
+TAGTTACCTTCATATCTCTCTTTA
+>179_1
+TAAAATTGTAATATTTAAATAATAT
+>180_1
+AAAAGGAAAAACAGAAAAATTGGG
+>181_1
+AGATGTTGATCTAAACTCTCCCA
+>182_1
+TACCTCTTTATTAACCTCCACCTCTA
+>183_1
+TTTCCGACAAATACACCATCTTC
+>184_1
+ACAAATCATAAATTTTTTTTTACT
+>185_1
+GACGAAACGCAACAACAAAATGGACG
+>186_1
+TACAAATTTTTTTTTCTTTCTTAT
+>187_1
+TACACCTCTTTTTACTTTTTTATT
+>188_1
+TATGGATTATTTCAAAATTTTTTTTT
+>189_1
+TTCTAGCACAACACGCACACATATA
+>190_1
+TAACTACTTTTACATTAATACTAA
+>191_1
+TCTCATCTTACAATTTTTTAAAACTT
+>192_1
+TTCTTGGACTACACATTTTTTATTGTTTTA
+>193_1
+TACACACTCATCAACCAAAGTACGTA
+>194_1
+TACTATATACTTCTTCAAATCACA
+>195_1
+TCAGAGTTCTACAGGTCCTACGATT
+>196_1
+TGATTTACTTACATTCTTTTTTT
+>197_1
+CCATATATGACTGACTCATTTCAC
+>198_1
+GAAGAGGAGGAGGAGTTTGTAAG
+>199_1
+AAAGACAAAAGAAATACAGGCACT
+>200_1
+TACAAGACTAAAACAAACGTGAAGT
+>201_1
+TAACGGAGCACGAGAACGAAGTGG
+>202_1
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+>203_1
+TAATAAGAAACTGTTCAAACAATCCAC
+>204_1
+TGAGCGGAGAACCAGAGTTGATGAGC
+>205_1
+TATTATTTTTTTATTCCATTCATAT
+>206_1
+TTTATTACTTAGTCATAATTCCAA
+>207_1
+TTTTATATTTCCTTATATCTTTACTA
+>208_1
+AACGGGGAATAAGGGTTCG
+>209_1
+AATCTACAATTTCCATTACGACTCC
+>210_1
+CCGACCGAGCAAATAAACACAGGAACG
+>211_1
+TCCACAACAACTCTATCTAAAGCATT
+>212_1
+TTCTTGATAACGCATCTTCTACAT
+>213_1
+TGCTTGGACTACATATGGTTGAGGG
+>214_1
+CAGATTCACTGATTTTCTTACGCC
+>215_1
+TTTGTTTTTCATTTTTTTATCTTT
+>216_1
+CTATATTTTCTCTCTTACC
+>217_1
+TAACCTTGCAGAACTATACGATTCAAA
+>218_1
+TAAGAAACTGAGCTAACGCAATGTACC
+>219_1
+TTCTTTTACTACATATTTTTTATTTTTTTA
+>220_1
+TATCTATCTTTGATCTTCTTTTCA
+>221_1
+TAATAAATTATTAAATAAAAAAAAAA
+>222_1
+TTTTTTATCAATTTTCACCATTCAT
+>223_1
+TATTTCACTTTATACTTCCTTAA
+>224_1
+TAGTTTTAAATATTTCTTTTTTTC
+>225_1
+TTCTTTTACTACATATTGTTTATTTTTTTA
+>226_1
+GAGAATAAATATTTCAATGGTCTATTG
+>227_1
+CGATATTTTCTCCTCTTACCT
+>228_1
+CACGACTTTATTCTTTTTATCTCA
+>229_1
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+>230_1
+CATAATATAAACTTATCTT
+>231_1
+ATGAAATTCGAACAATACGTC
+>232_1
+AACAACTGCAAACATCTACCACA
+>233_1
+TAAAAATAATTGTCTTTAATTTCA
+>234_1
+CGCAACCAGCAGCAACTCCTAGCAT
+>235_1
+ATTATTAATAAATTATTATAA
+>236_1
+CATTAATTCATCCATTTAAACTAA
+>237_1
+TCTTATTTTAATCTTCCAATTTC
+>238_1
+CTAGACAAGATGCTATAAATTTTAAA
+>239_1
+TGACCAAAGACAAACAAACAATAAATA
+>240_1
+TTTTTATCAATTTTCACCATTC
+>241_1
+TAAGTTTTTAATCATTTTTTTT
+>242_1
+TAATCAAAAAACTCTTCATTTTTA
+>243_1
+TACAAACGGAACTTTCGTCATAA
+>244_1
+TTTTCTTTTTTTCATTTTCTCTTTTA
+>245_1
+TAGCCTTTACTAGGCTTTTTCTAA
+>246_1
+TTAGTATTAATCTTCACTTAA
+>247_1
+TAAAATAAACCAAAACCCAAAAAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.sorted.faw	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,494 @@
+>100_1
+>101_1
+>102_1
+>103_1
+>104_1
+>105_1
+>106_1
+>107_1
+>108_1
+>109_1
+>10_1
+>110_1
+>111_1
+>112_1
+>113_1
+>114_1
+>115_1
+>116_1
+>117_1
+>118_1
+>119_1
+>11_1
+>120_1
+>121_1
+>122_1
+>123_1
+>124_1
+>125_1
+>126_1
+>127_1
+>128_1
+>129_1
+>12_1
+>130_1
+>131_1
+>132_1
+>133_1
+>134_1
+>135_1
+>136_1
+>137_1
+>138_1
+>139_1
+>13_1
+>140_1
+>141_1
+>142_1
+>143_1
+>144_1
+>145_1
+>146_1
+>147_1
+>148_1
+>149_1
+>14_1
+>150_1
+>151_1
+>152_1
+>153_1
+>154_1
+>155_1
+>156_1
+>157_1
+>158_1
+>159_1
+>15_1
+>160_1
+>161_1
+>162_1
+>163_1
+>164_1
+>165_1
+>166_1
+>167_1
+>168_1
+>169_1
+>16_1
+>170_1
+>171_1
+>172_1
+>173_1
+>174_1
+>175_1
+>176_1
+>177_1
+>178_1
+>179_1
+>17_1
+>180_1
+>181_1
+>182_1
+>183_1
+>184_1
+>185_1
+>186_1
+>187_1
+>188_1
+>189_1
+>18_1
+>190_1
+>191_1
+>192_1
+>193_1
+>194_1
+>195_1
+>196_1
+>197_1
+>198_1
+>199_1
+>19_1
+>1_2
+>200_1
+>201_1
+>202_1
+>203_1
+>204_1
+>205_1
+>206_1
+>207_1
+>208_1
+>209_1
+>20_1
+>210_1
+>211_1
+>212_1
+>213_1
+>214_1
+>215_1
+>216_1
+>217_1
+>218_1
+>219_1
+>21_1
+>220_1
+>221_1
+>222_1
+>223_1
+>224_1
+>225_1
+>226_1
+>227_1
+>228_1
+>229_1
+>22_1
+>230_1
+>231_1
+>232_1
+>233_1
+>234_1
+>235_1
+>236_1
+>237_1
+>238_1
+>239_1
+>23_1
+>240_1
+>241_1
+>242_1
+>243_1
+>244_1
+>245_1
+>246_1
+>247_1
+>24_1
+>25_1
+>26_1
+>27_1
+>28_1
+>29_1
+>2_2
+>30_1
+>31_1
+>32_1
+>33_1
+>34_1
+>35_1
+>36_1
+>37_1
+>38_1
+>39_1
+>3_2
+>40_1
+>41_1
+>42_1
+>43_1
+>44_1
+>45_1
+>46_1
+>47_1
+>48_1
+>49_1
+>4_1
+>50_1
+>51_1
+>52_1
+>53_1
+>54_1
+>55_1
+>56_1
+>57_1
+>58_1
+>59_1
+>5_1
+>60_1
+>61_1
+>62_1
+>63_1
+>64_1
+>65_1
+>66_1
+>67_1
+>68_1
+>69_1
+>6_1
+>70_1
+>71_1
+>72_1
+>73_1
+>74_1
+>75_1
+>76_1
+>77_1
+>78_1
+>79_1
+>7_1
+>80_1
+>81_1
+>82_1
+>83_1
+>84_1
+>85_1
+>86_1
+>87_1
+>88_1
+>89_1
+>8_1
+>90_1
+>91_1
+>92_1
+>93_1
+>94_1
+>95_1
+>96_1
+>97_1
+>98_1
+>99_1
+>9_1
+AAAAAATATCTTTTTTAACTCGTGGCC
+AAAAACACACAGACACAAGCAGCAAT
+AAAACCCGGACAAACCATCGGAGGA
+AAAAGGAAAAACAGAAAAATTGGG
+AAAATCGACTGCCGAAAACATTTTAA
+AAAGACAAAAGAAATACAGGCACT
+AAATTGCAAAGATGGAAAATAAAACT
+AACAACTGCAAACATCTACCACA
+AACAGGAAAAACAGAAGGATTTCTA
+AACAGGGAGATCAACAGCGTTGACA
+AACATTTTATCAATTATACATTA
+AACGGGGAATAAGGGTTCG
+AACTCTCTAATTTAACTTTGTGC
+AACTTTAAATTTTTAATAACCTT
+AAGACAACAATGACATATAAGACG
+AAGATGGAGTAGTTTTTT
+AAGCACGCCTTACCACAATTTATAA
+AAGGAATTAAAGCAATAATTCTAA
+AAGTGAAGAAGTAGTTTTT
+AATATAAAAATACAATCAACCATTGCA
+AATCTACAATTTCCATTACGACTCC
+AATGACACACTCTTCATCAAC
+AATGTCACTTGAAGAATTCACGT
+AATTGCAACAGAGACTGGAA
+AATTTATTTAATTTATATTCTAACTAA
+ACAAATCATAAATTTTTTTTTACT
+ACAACCTCAACTCATATTT
+ACAGCAGGACGGTGATCA
+ACCAGCACCTTCCGACTCAACGTCAAA
+ACTAAACTTTTCTTACCATATTTCTA
+AGAACAATTAAATAAAATAGCATA
+AGATGTTGATCTAAACTCTCCCA
+AGGAATATGATGAAATAAAAAAAT
+AGGATTTTTAAGCCCATATGTTTCC
+ATAAAGCTAGATTACCAAAGCAT
+ATGAAATTCGAACAATACGTC
+ATGTTATTTACTTTTTCCCCTTATA
+ATTATTAATAAATTATTATAA
+ATTTTACTTCATCATTTTC
+CAAACGGAACAAGACATCACCATC
+CAAATAACAAACTGAATAAACGAAA
+CAACACATGACGCGACAATTCTTG
+CAAGAATACAAAAAATACTAATTA
+CAAGATATGAACAAAGCAAAGACAC
+CAAGGCTCAGAAGAACATCACCAAGACC
+CAATTTTTAATTCCTTTTTTCTTCTT
+CACAGACTGAGGCAGAAAAAACAA
+CACCGAACCGGGAAGGCGAACAAC
+CACGACTTTATTCTTTTTATCTCA
+CAGATTCACTGATTTTCTTACGCC
+CAGTTTCACAAAAGATCTTTTAA
+CATAATATAAACTTATCTT
+CATACCTACAAAAAAGCTTCTCTTAC
+CATACCTTAAATTATCTCTTTCTT
+CATTAATTCATCCATTTAAACTAA
+CCAGAAAACAATACAACATCCTCA
+CCATATATGACTGACTCATTTCAC
+CCCGAAAAGCCGAGGACGACTTA
+CCGACCGAGCAAATAAACACAGGAACG
+CCGCGATCTGCTTATTTATAATCTT
+CCTTACTCAACATACTTAATCATACTTA
+CGATATTTTCTCCTCGTACC
+CGATATTTTCTCCTCTGACC
+CGATATTTTCTCCTCTTACC
+CGATATTTTCTCCTCTTACCT
+CGCAACCAGCAGCAACTCCTAGCAT
+CGCCGCAAGATGAATACTCTAATGA
+CGGAAAAGAATGTAGACCATTTAA
+CGGACGGTATATTTTTTAATATAA
+CGGCACATGTTGAATTACACTCA
+CTAGACAAGATGCTATAAATTTTAAA
+CTATATTTTCTCTCTTACC
+CTGGAAACTATTGATCAAATT
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+GAAACAAACAACACATACCCTCTGGC
+GAAACCATTATCTTATCTTTATACA
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+GAACAATTTTTCAATTTTTTACATTA
+GAAGAGGAGGAGGAGTTTGTAAG
+GAATGATCGCACCACCACCTCAACGTT
+GACGAAACGCAACAACAAAATGGACG
+GAGAACTTTTAATCATTTTAC
+GAGAATAAATATTTCAATGGTCTATTG
+GAGGATTAAAAGAACGGTTTATAA
+GCAGATAGAAATCAATACAAAAATC
+GCCAACGACCATACCACGA
+GCCGGGGCGTGAGATGTCTGCATTA
+GGACGGAGAACTGATAAGGGCA
+GGACGGAGAACTGATAATGGC
+GGAGATTGTAGAACGAAAGGAAAAT
+GGTATCTTTATATTTTAATTTTCTT
+NATTCTTACTCCATTTCAATTTACT
+TAAAAATAATTGTCTTTAATTTCA
+TAAAATAAACCAAAACCCAAAAAT
+TAAAATAAATAAGTCCGACGACAA
+TAAAATTGTAATATTTAAATAATAT
+TAAAGAAGAAGAATTGATTTTAAT
+TAACAAATAGAACGTTCTAATTTAAA
+TAACATAAATTTTAATCATAAATTG
+TAACCTTGCAGAACTATACGATTCAAA
+TAACGGAGCACGAGAACGAAGTGG
+TAACTACTTTTACATTAATACTAA
+TAACTTTAACTTTTTTACT
+TAAGAAACTGAGCTAACGCAATGTACC
+TAAGTTTTTAATCATTTTTTTT
+TAATAAATTATTAAATAAAAAAAAAA
+TAATAAGAAACTGTTCAAACAATCCAC
+TAATAATTTAAATAAATATAAATTT
+TAATCAAAAAACTCTTCATTTTTA
+TAATTACCATTGCTAACTATCCA
+TACAAAAAATGCGAAAATTGACCCT
+TACAAAACAAACAAATTACAATCTAAA
+TACAAACGGAACTTTCGTCATAA
+TACAAACGGAACTTTCTTCATAACTTC
+TACAAACGTAATTTTCGCATAACATC
+TACAAATTTTTTTTTCTTTCTTAT
+TACAACTAACATCCTTTCTTCTTCC
+TACAAGACTAAAACAAACGTGAAGT
+TACACACTCATCAACCAAAGGACG
+TACACACTCATCAACCAAAGTACGTA
+TACACAGACTTACAAAACACATCCTTC
+TACACCTCTTTTTACTTTTTTATT
+TACAGACAACACATACGGACTTAA
+TACAGACAACACATACGGACTTAAGT
+TACAGAGAAATATACAACACTCACC
+TACCTCTTTATTAACCTCCACCTCTA
+TACTATATACTTCTTCAAATCACA
+TACTATTTTATTATACATACATACATTA
+TACTCAATAGAACTCTACTCACTCATA
+TACTGAAAACGGGCGCATATCAGTGG
+TACTGAAACAAGGAAACACAAGC
+TACTTTTTTCTTAATTTTTTATTAAAC
+TAGAACTCGAACCAGAGCTCC
+TAGACTTTCTACTCATTATTAC
+TAGCCTTTACTAGGCTTTTTCTAA
+TAGCGAGATGGACCAACGTGCTGT
+TAGGAACTTCATACCGGTCTC
+TAGGTACTTACCTTTTTTTTACACAA
+TAGTCATACATACCTAATTATACATA
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+TAGTTACCTTCATATCTCTCTTTA
+TAGTTTCACTACTTTATTCTTTTTA
+TAGTTTTAAATATTTCTTTTTTTC
+TATAATTTTATTTTATATTTTCTCT
+TATATAAATCTTCAACATCAA
+TATATTGCCTCCCCATAATCCTT
+TATCTATCTTTGATCTTCTTTTCA
+TATCTGATCAACAATCTTTTCCCAT
+TATGGAGAAACAGCGATATAAGTCA
+TATGGATTATTTCAAAATTTTTTTTT
+TATTATACATAGAATAACAAATCTTT
+TATTATCATCTCGTTCTTCCTTCTC
+TATTATTTTTTTATTCCATTCATAT
+TATTCAATCACTCCATTATATATAACA
+TATTCCGACAATACCTTCTTTAC
+TATTTATAAATTTTTTCTTGAGAC
+TATTTCACTTTATACTTCCTTAA
+TATTTCAGCAACAGACTAAGACTAA
+TATTTCCAACCTTCAACCTCAAATAA
+TATTTCTTTTTTAACTTCTTTTC
+TCAAAAAGTAATAGGGATCGTTA
+TCAAAGAACAATGTAAAGCCGCGAC
+TCAAATGCAAATTGGATTTATGA
+TCAACTGGCAAGAATTTTTGAAAATT
+TCAAGCCTTTTGAAGAACTGACCTAAA
+TCAGAAGAACAGAGAATTGATTTT
+TCAGAGTTCTACAGGTCCTACGATT
+TCAGTCTTTTTTTCTCTCCTA
+TCATTACACTTCTTACAAAAC
+TCCACAACAACTCTATCTAAAGCATT
+TCCACCTATTTATCTTTTCTT
+TCCCAACCCTCGAGCATCATTTTC
+TCCGAAAAATCGTAGGACCCGGGCA
+TCCGAAAACAAGGCCCGTCGCT
+TCCTGAGGACGAGGGGCGTTTAGC
+TCGCCGTAAAGCCAGTCGTTCTCC
+TCTAAACACCCACGAAAATCTCTTAC
+TCTAGTCTGAGCGTAGTACCAGATTG
+TCTATATTATTTTTATCAATTTTCACC
+TCTATTCATACAAAACACTAATACCC
+TCTATTTCTTTATTTTTTTTATTAT
+TCTCATCTTACAATTTTTTAAAACTT
+TCTCTTTTATTTTTATCTTTCCTT
+TCTGTATTTGACTTATTACTTTCTCC
+TCTTATTATCATTTTTTTATCCCTT
+TCTTATTTTAATCTTCCAATTTC
+TCTTCTATATAATCCTTTATTATAA
+TCTTTTTTTTAATACTTATTTTCATT
+TGAAACTGAAACTAACATACAAAATATT
+TGAAAGGAAAAACAGGACACGGGA
+TGACCAAAGACAAACAAACAATAAATA
+TGAGAATGACTTCTTCACGATCTCTT
+TGAGCGGAGAACCAGAGTTGATGAGC
+TGAGCTAGAACTGCACCCACTCCA
+TGAGGAAAACAGAAAAATGAGAGACA
+TGATGACGGGCAGCAGGGATTTTC
+TGATTTACTTACATTCTTTTTTT
+TGCTTGGACTACATATGGTTGAGGG
+TGCTTGGACTACATATGGTTGAGTG
+TGCTTTTACTACATATTTTTTATTTTTTTA
+TGGAATGTAAAGAAGTATGGAG
+TGGACAAGAACCACGCGACGGGTGT
+TGGACGGAGAACTGATAAGGGC
+TGGAGTAGCACAGTCGTCTGAAATC
+TGTGTAATCTTTCTACTTCTTCTAC
+TTAAACAATTTGGAATTAATT
+TTAAAGACGCAACAACTAACATT
+TTAAGTTTTAGACATAATCTATTACAA
+TTAATATGTAATTTCATACCTCAC
+TTAATGACACACGGGAAAAACACCG
+TTACAATCTACTATTCTTTTATTA
+TTACTAGATCCACCCTCATTA
+TTAGTATTAATCTTCACTTAA
+TTATAATCACGGCACCCTATACA
+TTATTACATCGTCCACATATAACAAAA
+TTATTATCTATTTTAATTTTTCTTAA
+TTCAAGTAGATTGCATTTTTTAATA
+TTCATTTTATTTTTAAATATCTTTTTT
+TTCTAGCACAACACGCACACATATA
+TTCTTCCTTTTATCCTCTCTTAA
+TTCTTGATAACGCATCTTCTACAT
+TTCTTGGACTACACATTTTTTATTGTTTTA
+TTCTTTGACTACATATTTTTTATT
+TTCTTTTACTACATATTGTTTATTTTTTTA
+TTCTTTTACTACATATTTTTTATTTTT
+TTCTTTTACTACATATTTTTTATTTTTTTA
+TTGATTCTTCTTTTTCACAAAA
+TTGTAAAACATTCTTTCTCCTGAC
+TTTAAACACTTCCTACATCAAATTTC
+TTTAAGACTTATGAGCTTG
+TTTACCAGAGGAGTCGAGTTTTT
+TTTATTACAACCCTATCTTACCTCAA
+TTTATTACTTAGTCATAATTCCAA
+TTTCCGACAAATACACCATCTTC
+TTTGATACCTTTATACCATACCTATT
+TTTGTTTTTCATTTTTTTATCTTT
+TTTGTTTTTTACTATATTT
+TTTTATATTTCCTTATATCTTTACTA
+TTTTCATTTCTTCTTCAAATCCTTT
+TTTTCTTTACCCATCTTTACTTTCCC
+TTTTCTTTTTTTCATTTTCTCTTTTA
+TTTTTAACTCATTTTACAATTAAAC
+TTTTTAACTCCCATCATTTTTCCTC
+TTTTTATCAATTTTCACCATTC
+TTTTTTATCAATTTTCACCATTCAT
+TTTTTTATCATTTTTCACCTAAAAAA
+TTTTTTTTTTGTTTTTATTTTTATCAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.sorted.tab	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,247 @@
+AAAAAATATCTTTTTTAACTCGTGGCC	1
+AAAAACACACAGACACAAGCAGCAAT	1
+AAAACCCGGACAAACCATCGGAGGA	1
+AAAAGGAAAAACAGAAAAATTGGG	1
+AAAATCGACTGCCGAAAACATTTTAA	1
+AAAGACAAAAGAAATACAGGCACT	1
+AAATTGCAAAGATGGAAAATAAAACT	1
+AACAACTGCAAACATCTACCACA	1
+AACAGGAAAAACAGAAGGATTTCTA	1
+AACAGGGAGATCAACAGCGTTGACA	1
+AACATTTTATCAATTATACATTA	1
+AACGGGGAATAAGGGTTCG	1
+AACTCTCTAATTTAACTTTGTGC	1
+AACTTTAAATTTTTAATAACCTT	1
+AAGACAACAATGACATATAAGACG	1
+AAGATGGAGTAGTTTTTT	1
+AAGCACGCCTTACCACAATTTATAA	1
+AAGGAATTAAAGCAATAATTCTAA	1
+AAGTGAAGAAGTAGTTTTT	1
+AATATAAAAATACAATCAACCATTGCA	1
+AATCTACAATTTCCATTACGACTCC	1
+AATGACACACTCTTCATCAAC	1
+AATGTCACTTGAAGAATTCACGT	1
+AATTGCAACAGAGACTGGAA	1
+AATTTATTTAATTTATATTCTAACTAA	1
+ACAAATCATAAATTTTTTTTTACT	1
+ACAACCTCAACTCATATTT	1
+ACAGCAGGACGGTGATCA	2
+ACCAGCACCTTCCGACTCAACGTCAAA	1
+ACTAAACTTTTCTTACCATATTTCTA	1
+AGAACAATTAAATAAAATAGCATA	1
+AGATGTTGATCTAAACTCTCCCA	1
+AGGAATATGATGAAATAAAAAAAT	1
+AGGATTTTTAAGCCCATATGTTTCC	1
+ATAAAGCTAGATTACCAAAGCAT	1
+ATGAAATTCGAACAATACGTC	1
+ATGTTATTTACTTTTTCCCCTTATA	1
+ATTATTAATAAATTATTATAA	1
+ATTTTACTTCATCATTTTC	1
+CAAACGGAACAAGACATCACCATC	1
+CAAATAACAAACTGAATAAACGAAA	1
+CAACACATGACGCGACAATTCTTG	1
+CAAGAATACAAAAAATACTAATTA	1
+CAAGATATGAACAAAGCAAAGACAC	1
+CAAGGCTCAGAAGAACATCACCAAGACC	1
+CAATTTTTAATTCCTTTTTTCTTCTT	1
+CACAGACTGAGGCAGAAAAAACAA	1
+CACCGAACCGGGAAGGCGAACAAC	1
+CACGACTTTATTCTTTTTATCTCA	1
+CAGATTCACTGATTTTCTTACGCC	1
+CAGTTTCACAAAAGATCTTTTAA	1
+CATAATATAAACTTATCTT	1
+CATACCTACAAAAAAGCTTCTCTTAC	1
+CATACCTTAAATTATCTCTTTCTT	1
+CATTAATTCATCCATTTAAACTAA	1
+CCAGAAAACAATACAACATCCTCA	1
+CCATATATGACTGACTCATTTCAC	1
+CCCGAAAAGCCGAGGACGACTTA	1
+CCGACCGAGCAAATAAACACAGGAACG	1
+CCGCGATCTGCTTATTTATAATCTT	1
+CCTTACTCAACATACTTAATCATACTTA	1
+CGATATTTTCTCCTCGTACC	1
+CGATATTTTCTCCTCTGACC	1
+CGATATTTTCTCCTCTTACC	1
+CGATATTTTCTCCTCTTACCT	1
+CGCAACCAGCAGCAACTCCTAGCAT	1
+CGCCGCAAGATGAATACTCTAATGA	1
+CGGAAAAGAATGTAGACCATTTAA	1
+CGGACGGTATATTTTTTAATATAA	1
+CGGCACATGTTGAATTACACTCA	1
+CTAGACAAGATGCTATAAATTTTAAA	1
+CTATATTTTCTCTCTTACC	1
+CTGGAAACTATTGATCAAATT	1
+CTTCTTTTACTACATATTTTTTATTTTTTTA	1
+GAAACAAACAACACATACCCTCTGGC	1
+GAAACCATTATCTTATCTTTATACA	1
+GAAAGGAAGGGAAGAAAGCGAAAGGA	1
+GAACAATTTTTCAATTTTTTACATTA	1
+GAAGAGGAGGAGGAGTTTGTAAG	1
+GAATGATCGCACCACCACCTCAACGTT	1
+GACGAAACGCAACAACAAAATGGACG	1
+GAGAACTTTTAATCATTTTAC	1
+GAGAATAAATATTTCAATGGTCTATTG	1
+GAGGATTAAAAGAACGGTTTATAA	1
+GCAGATAGAAATCAATACAAAAATC	1
+GCCAACGACCATACCACGA	1
+GCCGGGGCGTGAGATGTCTGCATTA	1
+GGACGGAGAACTGATAAGGGCA	1
+GGACGGAGAACTGATAATGGC	1
+GGAGATTGTAGAACGAAAGGAAAAT	1
+GGTATCTTTATATTTTAATTTTCTT	1
+NATTCTTACTCCATTTCAATTTACT	1
+TAAAAATAATTGTCTTTAATTTCA	1
+TAAAATAAACCAAAACCCAAAAAT	1
+TAAAATAAATAAGTCCGACGACAA	1
+TAAAATTGTAATATTTAAATAATAT	1
+TAAAGAAGAAGAATTGATTTTAAT	1
+TAACAAATAGAACGTTCTAATTTAAA	1
+TAACATAAATTTTAATCATAAATTG	1
+TAACCTTGCAGAACTATACGATTCAAA	1
+TAACGGAGCACGAGAACGAAGTGG	1
+TAACTACTTTTACATTAATACTAA	1
+TAACTTTAACTTTTTTACT	1
+TAAGAAACTGAGCTAACGCAATGTACC	1
+TAAGTTTTTAATCATTTTTTTT	1
+TAATAAATTATTAAATAAAAAAAAAA	1
+TAATAAGAAACTGTTCAAACAATCCAC	1
+TAATAATTTAAATAAATATAAATTT	1
+TAATCAAAAAACTCTTCATTTTTA	1
+TAATTACCATTGCTAACTATCCA	1
+TACAAAAAATGCGAAAATTGACCCT	1
+TACAAAACAAACAAATTACAATCTAAA	1
+TACAAACGGAACTTTCGTCATAA	1
+TACAAACGGAACTTTCTTCATAACTTC	1
+TACAAACGTAATTTTCGCATAACATC	1
+TACAAATTTTTTTTTCTTTCTTAT	1
+TACAACTAACATCCTTTCTTCTTCC	1
+TACAAGACTAAAACAAACGTGAAGT	1
+TACACACTCATCAACCAAAGGACG	1
+TACACACTCATCAACCAAAGTACGTA	1
+TACACAGACTTACAAAACACATCCTTC	1
+TACACCTCTTTTTACTTTTTTATT	1
+TACAGACAACACATACGGACTTAA	1
+TACAGACAACACATACGGACTTAAGT	1
+TACAGAGAAATATACAACACTCACC	1
+TACCTCTTTATTAACCTCCACCTCTA	1
+TACTATATACTTCTTCAAATCACA	1
+TACTATTTTATTATACATACATACATTA	1
+TACTCAATAGAACTCTACTCACTCATA	1
+TACTGAAAACGGGCGCATATCAGTGG	1
+TACTGAAACAAGGAAACACAAGC	1
+TACTTTTTTCTTAATTTTTTATTAAAC	1
+TAGAACTCGAACCAGAGCTCC	1
+TAGACTTTCTACTCATTATTAC	1
+TAGCCTTTACTAGGCTTTTTCTAA	1
+TAGCGAGATGGACCAACGTGCTGT	1
+TAGGAACTTCATACCGGTCTC	1
+TAGGTACTTACCTTTTTTTTACACAA	1
+TAGTCATACATACCTAATTATACATA	1
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA	1
+TAGTTACCTTCATATCTCTCTTTA	1
+TAGTTTCACTACTTTATTCTTTTTA	1
+TAGTTTTAAATATTTCTTTTTTTC	1
+TATAATTTTATTTTATATTTTCTCT	1
+TATATAAATCTTCAACATCAA	1
+TATATTGCCTCCCCATAATCCTT	1
+TATCTATCTTTGATCTTCTTTTCA	1
+TATCTGATCAACAATCTTTTCCCAT	1
+TATGGAGAAACAGCGATATAAGTCA	1
+TATGGATTATTTCAAAATTTTTTTTT	1
+TATTATACATAGAATAACAAATCTTT	1
+TATTATCATCTCGTTCTTCCTTCTC	1
+TATTATTTTTTTATTCCATTCATAT	1
+TATTCAATCACTCCATTATATATAACA	1
+TATTCCGACAATACCTTCTTTAC	1
+TATTTATAAATTTTTTCTTGAGAC	1
+TATTTCACTTTATACTTCCTTAA	1
+TATTTCAGCAACAGACTAAGACTAA	1
+TATTTCCAACCTTCAACCTCAAATAA	1
+TATTTCTTTTTTAACTTCTTTTC	1
+TCAAAAAGTAATAGGGATCGTTA	1
+TCAAAGAACAATGTAAAGCCGCGAC	1
+TCAAATGCAAATTGGATTTATGA	1
+TCAACTGGCAAGAATTTTTGAAAATT	1
+TCAAGCCTTTTGAAGAACTGACCTAAA	1
+TCAGAAGAACAGAGAATTGATTTT	1
+TCAGAGTTCTACAGGTCCTACGATT	1
+TCAGTCTTTTTTTCTCTCCTA	1
+TCATTACACTTCTTACAAAAC	1
+TCCACAACAACTCTATCTAAAGCATT	1
+TCCACCTATTTATCTTTTCTT	1
+TCCCAACCCTCGAGCATCATTTTC	1
+TCCGAAAAATCGTAGGACCCGGGCA	1
+TCCGAAAACAAGGCCCGTCGCT	1
+TCCTGAGGACGAGGGGCGTTTAGC	1
+TCGCCGTAAAGCCAGTCGTTCTCC	1
+TCTAAACACCCACGAAAATCTCTTAC	1
+TCTAGTCTGAGCGTAGTACCAGATTG	1
+TCTATATTATTTTTATCAATTTTCACC	1
+TCTATTCATACAAAACACTAATACCC	1
+TCTATTTCTTTATTTTTTTTATTAT	1
+TCTCATCTTACAATTTTTTAAAACTT	1
+TCTCTTTTATTTTTATCTTTCCTT	1
+TCTGTATTTGACTTATTACTTTCTCC	1
+TCTTATTATCATTTTTTTATCCCTT	1
+TCTTATTTTAATCTTCCAATTTC	1
+TCTTCTATATAATCCTTTATTATAA	1
+TCTTTTTTTTAATACTTATTTTCATT	1
+TGAAACTGAAACTAACATACAAAATATT	1
+TGAAAGGAAAAACAGGACACGGGA	1
+TGACCAAAGACAAACAAACAATAAATA	1
+TGAGAATGACTTCTTCACGATCTCTT	1
+TGAGCGGAGAACCAGAGTTGATGAGC	1
+TGAGCTAGAACTGCACCCACTCCA	1
+TGAGGAAAACAGAAAAATGAGAGACA	1
+TGATGACGGGCAGCAGGGATTTTC	1
+TGATTTACTTACATTCTTTTTTT	1
+TGCTTGGACTACATATGGTTGAGGG	1
+TGCTTGGACTACATATGGTTGAGTG	2
+TGCTTTTACTACATATTTTTTATTTTTTTA	1
+TGGAATGTAAAGAAGTATGGAG	2
+TGGACAAGAACCACGCGACGGGTGT	1
+TGGACGGAGAACTGATAAGGGC	1
+TGGAGTAGCACAGTCGTCTGAAATC	1
+TGTGTAATCTTTCTACTTCTTCTAC	1
+TTAAACAATTTGGAATTAATT	1
+TTAAAGACGCAACAACTAACATT	1
+TTAAGTTTTAGACATAATCTATTACAA	1
+TTAATATGTAATTTCATACCTCAC	1
+TTAATGACACACGGGAAAAACACCG	1
+TTACAATCTACTATTCTTTTATTA	1
+TTACTAGATCCACCCTCATTA	1
+TTAGTATTAATCTTCACTTAA	1
+TTATAATCACGGCACCCTATACA	1
+TTATTACATCGTCCACATATAACAAAA	1
+TTATTATCTATTTTAATTTTTCTTAA	1
+TTCAAGTAGATTGCATTTTTTAATA	1
+TTCATTTTATTTTTAAATATCTTTTTT	1
+TTCTAGCACAACACGCACACATATA	1
+TTCTTCCTTTTATCCTCTCTTAA	1
+TTCTTGATAACGCATCTTCTACAT	1
+TTCTTGGACTACACATTTTTTATTGTTTTA	1
+TTCTTTGACTACATATTTTTTATT	1
+TTCTTTTACTACATATTGTTTATTTTTTTA	1
+TTCTTTTACTACATATTTTTTATTTTT	1
+TTCTTTTACTACATATTTTTTATTTTTTTA	1
+TTGATTCTTCTTTTTCACAAAA	1
+TTGTAAAACATTCTTTCTCCTGAC	1
+TTTAAACACTTCCTACATCAAATTTC	1
+TTTAAGACTTATGAGCTTG	1
+TTTACCAGAGGAGTCGAGTTTTT	1
+TTTATTACAACCCTATCTTACCTCAA	1
+TTTATTACTTAGTCATAATTCCAA	1
+TTTCCGACAAATACACCATCTTC	1
+TTTGATACCTTTATACCATACCTATT	1
+TTTGTTTTTCATTTTTTTATCTTT	1
+TTTGTTTTTTACTATATTT	1
+TTTTATATTTCCTTATATCTTTACTA	1
+TTTTCATTTCTTCTTCAAATCCTTT	1
+TTTTCTTTACCCATCTTTACTTTCCC	1
+TTTTCTTTTTTTCATTTTCTCTTTTA	1
+TTTTTAACTCATTTTACAATTAAAC	1
+TTTTTAACTCCCATCATTTTTCCTC	1
+TTTTTATCAATTTTCACCATTC	1
+TTTTTTATCAATTTTCACCATTCAT	1
+TTTTTTATCATTTTTCACCTAAAAAA	1
+TTTTTTTTTTGTTTTTATTTTTATCAT	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.tab	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,247 @@
+ACAGCAGGACGGTGATCA	2
+TGCTTGGACTACATATGGTTGAGTG	2
+TGGAATGTAAAGAAGTATGGAG	2
+AATTTATTTAATTTATATTCTAACTAA	1
+TTATAATCACGGCACCCTATACA	1
+TCCGAAAAATCGTAGGACCCGGGCA	1
+TCCCAACCCTCGAGCATCATTTTC	1
+TTGATTCTTCTTTTTCACAAAA	1
+AGAACAATTAAATAAAATAGCATA	1
+CCAGAAAACAATACAACATCCTCA	1
+TCTAGTCTGAGCGTAGTACCAGATTG	1
+TTTTTAACTCCCATCATTTTTCCTC	1
+GCAGATAGAAATCAATACAAAAATC	1
+AAGTGAAGAAGTAGTTTTT	1
+AATGTCACTTGAAGAATTCACGT	1
+TTTACCAGAGGAGTCGAGTTTTT	1
+GGTATCTTTATATTTTAATTTTCTT	1
+TTACTAGATCCACCCTCATTA	1
+TATATAAATCTTCAACATCAA	1
+GAAACCATTATCTTATCTTTATACA	1
+GGAGATTGTAGAACGAAAGGAAAAT	1
+ACTAAACTTTTCTTACCATATTTCTA	1
+TCAAGCCTTTTGAAGAACTGACCTAAA	1
+TAACATAAATTTTAATCATAAATTG	1
+TAGCGAGATGGACCAACGTGCTGT	1
+TACAAACGTAATTTTCGCATAACATC	1
+CGGAAAAGAATGTAGACCATTTAA	1
+GAAAGGAAGGGAAGAAAGCGAAAGGA	1
+CAAGAATACAAAAAATACTAATTA	1
+CATACCTACAAAAAAGCTTCTCTTAC	1
+TAAAATAAATAAGTCCGACGACAA	1
+AGGAATATGATGAAATAAAAAAAT	1
+TTTTTTTTTTGTTTTTATTTTTATCAT	1
+TTTTTAACTCATTTTACAATTAAAC	1
+CCGCGATCTGCTTATTTATAATCTT	1
+TAGGTACTTACCTTTTTTTTACACAA	1
+TCATTACACTTCTTACAAAAC	1
+TATTATACATAGAATAACAAATCTTT	1
+ATGTTATTTACTTTTTCCCCTTATA	1
+TCCGAAAACAAGGCCCGTCGCT	1
+TGCTTTTACTACATATTTTTTATTTTTTTA	1
+GAACAATTTTTCAATTTTTTACATTA	1
+AACATTTTATCAATTATACATTA	1
+TATCTGATCAACAATCTTTTCCCAT	1
+AATTGCAACAGAGACTGGAA	1
+TATTCAATCACTCCATTATATATAACA	1
+CAAACGGAACAAGACATCACCATC	1
+TCTTCTATATAATCCTTTATTATAA	1
+TGATGACGGGCAGCAGGGATTTTC	1
+TTTATTACAACCCTATCTTACCTCAA	1
+AACAGGAAAAACAGAAGGATTTCTA	1
+CGATATTTTCTCCTCGTACC	1
+AAATTGCAAAGATGGAAAATAAAACT	1
+TACACACTCATCAACCAAAGGACG	1
+TATATTGCCTCCCCATAATCCTT	1
+TACAAACGGAACTTTCTTCATAACTTC	1
+GGACGGAGAACTGATAATGGC	1
+TTCTTTGACTACATATTTTTTATT	1
+TCTTTTTTTTAATACTTATTTTCATT	1
+TACTTTTTTCTTAATTTTTTATTAAAC	1
+AATATAAAAATACAATCAACCATTGCA	1
+CAGTTTCACAAAAGATCTTTTAA	1
+GAAACAAACAACACATACCCTCTGGC	1
+AATGACACACTCTTCATCAAC	1
+AAGATGGAGTAGTTTTTT	1
+ACAACCTCAACTCATATTT	1
+TTAAACAATTTGGAATTAATT	1
+TACAAAAAATGCGAAAATTGACCCT	1
+GGACGGAGAACTGATAAGGGCA	1
+TTAAGTTTTAGACATAATCTATTACAA	1
+TTATTATCTATTTTAATTTTTCTTAA	1
+CACCGAACCGGGAAGGCGAACAAC	1
+TCCACCTATTTATCTTTTCTT	1
+TCGCCGTAAAGCCAGTCGTTCTCC	1
+TCAAAGAACAATGTAAAGCCGCGAC	1
+TGGACAAGAACCACGCGACGGGTGT	1
+CCCGAAAAGCCGAGGACGACTTA	1
+TTTTTTATCATTTTTCACCTAAAAAA	1
+TAGAACTCGAACCAGAGCTCC	1
+TCTATATTATTTTTATCAATTTTCACC	1
+TCTATTTCTTTATTTTTTTTATTAT	1
+TTTGATACCTTTATACCATACCTATT	1
+ATAAAGCTAGATTACCAAAGCAT	1
+GCCAACGACCATACCACGA	1
+CGGCACATGTTGAATTACACTCA	1
+TACTATTTTATTATACATACATACATTA	1
+TTAATGACACACGGGAAAAACACCG	1
+TAGTTTCACTACTTTATTCTTTTTA	1
+AACAGGGAGATCAACAGCGTTGACA	1
+CGATATTTTCTCCTCTGACC	1
+ACCAGCACCTTCCGACTCAACGTCAAA	1
+AAGGAATTAAAGCAATAATTCTAA	1
+TACAAAACAAACAAATTACAATCTAAA	1
+CAATTTTTAATTCCTTTTTTCTTCTT	1
+TACAGACAACACATACGGACTTAA	1
+TCTGTATTTGACTTATTACTTTCTCC	1
+TGAGCTAGAACTGCACCCACTCCA	1
+CGCCGCAAGATGAATACTCTAATGA	1
+TATTTCTTTTTTAACTTCTTTTC	1
+TTACAATCTACTATTCTTTTATTA	1
+TTTAAACACTTCCTACATCAAATTTC	1
+TGTGTAATCTTTCTACTTCTTCTAC	1
+TCTATTCATACAAAACACTAATACCC	1
+TGGAGTAGCACAGTCGTCTGAAATC	1
+AAGCACGCCTTACCACAATTTATAA	1
+CTGGAAACTATTGATCAAATT	1
+TACACAGACTTACAAAACACATCCTTC	1
+TTCAAGTAGATTGCATTTTTTAATA	1
+TTATTACATCGTCCACATATAACAAAA	1
+CAAGGCTCAGAAGAACATCACCAAGACC	1
+TGAGGAAAACAGAAAAATGAGAGACA	1
+TCAAAAAGTAATAGGGATCGTTA	1
+TAACTTTAACTTTTTTACT	1
+TATTCCGACAATACCTTCTTTAC	1
+TTTGTTTTTTACTATATTT	1
+TTCATTTTATTTTTAAATATCTTTTTT	1
+TACTCAATAGAACTCTACTCACTCATA	1
+TGAAAGGAAAAACAGGACACGGGA	1
+AAAATCGACTGCCGAAAACATTTTAA	1
+TACAGAGAAATATACAACACTCACC	1
+TCAACTGGCAAGAATTTTTGAAAATT	1
+GAGAACTTTTAATCATTTTAC	1
+TATTATCATCTCGTTCTTCCTTCTC	1
+TTTTCATTTCTTCTTCAAATCCTTT	1
+TAGTCATACATACCTAATTATACATA	1
+ATTTTACTTCATCATTTTC	1
+TCTCTTTTATTTTTATCTTTCCTT	1
+GCCGGGGCGTGAGATGTCTGCATTA	1
+AGGATTTTTAAGCCCATATGTTTCC	1
+CAAGATATGAACAAAGCAAAGACAC	1
+CAACACATGACGCGACAATTCTTG	1
+CAAATAACAAACTGAATAAACGAAA	1
+TGAGAATGACTTCTTCACGATCTCTT	1
+TCTTATTATCATTTTTTTATCCCTT	1
+TCAAATGCAAATTGGATTTATGA	1
+CCTTACTCAACATACTTAATCATACTTA	1
+TAGACTTTCTACTCATTATTAC	1
+TGAAACTGAAACTAACATACAAAATATT	1
+AAAACCCGGACAAACCATCGGAGGA	1
+TACAGACAACACATACGGACTTAAGT	1
+TATTTCAGCAACAGACTAAGACTAA	1
+AACTTTAAATTTTTAATAACCTT	1
+TATTTATAAATTTTTTCTTGAGAC	1
+TTAATATGTAATTTCATACCTCAC	1
+CACAGACTGAGGCAGAAAAAACAA	1
+TAAAGAAGAAGAATTGATTTTAAT	1
+TACTGAAAACGGGCGCATATCAGTGG	1
+TCAGTCTTTTTTTCTCTCCTA	1
+TATAATTTTATTTTATATTTTCTCT	1
+NATTCTTACTCCATTTCAATTTACT	1
+TTGTAAAACATTCTTTCTCCTGAC	1
+TAATTACCATTGCTAACTATCCA	1
+TTCTTCCTTTTATCCTCTCTTAA	1
+TCTAAACACCCACGAAAATCTCTTAC	1
+AAAAACACACAGACACAAGCAGCAAT	1
+CGGACGGTATATTTTTTAATATAA	1
+TATGGAGAAACAGCGATATAAGTCA	1
+TACAACTAACATCCTTTCTTCTTCC	1
+AACTCTCTAATTTAACTTTGTGC	1
+TCCTGAGGACGAGGGGCGTTTAGC	1
+TATTTCCAACCTTCAACCTCAAATAA	1
+TGGACGGAGAACTGATAAGGGC	1
+TTTAAGACTTATGAGCTTG	1
+TTAAAGACGCAACAACTAACATT	1
+TAGGAACTTCATACCGGTCTC	1
+CGATATTTTCTCCTCTTACC	1
+GAGGATTAAAAGAACGGTTTATAA	1
+GAATGATCGCACCACCACCTCAACGTT	1
+TTTTCTTTACCCATCTTTACTTTCCC	1
+AAGACAACAATGACATATAAGACG	1
+TAATAATTTAAATAAATATAAATTT	1
+TACTGAAACAAGGAAACACAAGC	1
+TCAGAAGAACAGAGAATTGATTTT	1
+CATACCTTAAATTATCTCTTTCTT	1
+TTCTTTTACTACATATTTTTTATTTTT	1
+AAAAAATATCTTTTTTAACTCGTGGCC	1
+TAACAAATAGAACGTTCTAATTTAAA	1
+TAGTTACCTTCATATCTCTCTTTA	1
+TAAAATTGTAATATTTAAATAATAT	1
+AAAAGGAAAAACAGAAAAATTGGG	1
+AGATGTTGATCTAAACTCTCCCA	1
+TACCTCTTTATTAACCTCCACCTCTA	1
+TTTCCGACAAATACACCATCTTC	1
+ACAAATCATAAATTTTTTTTTACT	1
+GACGAAACGCAACAACAAAATGGACG	1
+TACAAATTTTTTTTTCTTTCTTAT	1
+TACACCTCTTTTTACTTTTTTATT	1
+TATGGATTATTTCAAAATTTTTTTTT	1
+TTCTAGCACAACACGCACACATATA	1
+TAACTACTTTTACATTAATACTAA	1
+TCTCATCTTACAATTTTTTAAAACTT	1
+TTCTTGGACTACACATTTTTTATTGTTTTA	1
+TACACACTCATCAACCAAAGTACGTA	1
+TACTATATACTTCTTCAAATCACA	1
+TCAGAGTTCTACAGGTCCTACGATT	1
+TGATTTACTTACATTCTTTTTTT	1
+CCATATATGACTGACTCATTTCAC	1
+GAAGAGGAGGAGGAGTTTGTAAG	1
+AAAGACAAAAGAAATACAGGCACT	1
+TACAAGACTAAAACAAACGTGAAGT	1
+TAACGGAGCACGAGAACGAAGTGG	1
+CTTCTTTTACTACATATTTTTTATTTTTTTA	1
+TAATAAGAAACTGTTCAAACAATCCAC	1
+TGAGCGGAGAACCAGAGTTGATGAGC	1
+TATTATTTTTTTATTCCATTCATAT	1
+TTTATTACTTAGTCATAATTCCAA	1
+TTTTATATTTCCTTATATCTTTACTA	1
+AACGGGGAATAAGGGTTCG	1
+AATCTACAATTTCCATTACGACTCC	1
+CCGACCGAGCAAATAAACACAGGAACG	1
+TCCACAACAACTCTATCTAAAGCATT	1
+TTCTTGATAACGCATCTTCTACAT	1
+TGCTTGGACTACATATGGTTGAGGG	1
+CAGATTCACTGATTTTCTTACGCC	1
+TTTGTTTTTCATTTTTTTATCTTT	1
+CTATATTTTCTCTCTTACC	1
+TAACCTTGCAGAACTATACGATTCAAA	1
+TAAGAAACTGAGCTAACGCAATGTACC	1
+TTCTTTTACTACATATTTTTTATTTTTTTA	1
+TATCTATCTTTGATCTTCTTTTCA	1
+TAATAAATTATTAAATAAAAAAAAAA	1
+TTTTTTATCAATTTTCACCATTCAT	1
+TATTTCACTTTATACTTCCTTAA	1
+TAGTTTTAAATATTTCTTTTTTTC	1
+TTCTTTTACTACATATTGTTTATTTTTTTA	1
+GAGAATAAATATTTCAATGGTCTATTG	1
+CGATATTTTCTCCTCTTACCT	1
+CACGACTTTATTCTTTTTATCTCA	1
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA	1
+CATAATATAAACTTATCTT	1
+ATGAAATTCGAACAATACGTC	1
+AACAACTGCAAACATCTACCACA	1
+TAAAAATAATTGTCTTTAATTTCA	1
+CGCAACCAGCAGCAACTCCTAGCAT	1
+ATTATTAATAAATTATTATAA	1
+CATTAATTCATCCATTTAAACTAA	1
+TCTTATTTTAATCTTCCAATTTC	1
+CTAGACAAGATGCTATAAATTTTAAA	1
+TGACCAAAGACAAACAAACAATAAATA	1
+TTTTTATCAATTTTCACCATTC	1
+TAAGTTTTTAATCATTTTTTTT	1
+TAATCAAAAAACTCTTCATTTTTA	1
+TACAAACGGAACTTTCGTCATAA	1
+TTTTCTTTTTTTCATTTTCTCTTTTA	1
+TAGCCTTTACTAGGCTTTTTCTAA	1
+TTAGTATTAATCTTCACTTAA	1
+TAAAATAAACCAAAACCCAAAAAT	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort.py	Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,8 @@
+import sys
+
+F = open(sys.argv[1], 'r')
+lines = F.readlines()
+lines = [line[:-1] for line in lines]
+for line in sorted(lines):
+    print(line)
+F.close()