# HG changeset patch # User artbio # Date 1504523608 14400 # Node ID a8aacccd79a381f0e32afab3340e96bd29490238 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db diff -r 000000000000 -r a8aacccd79a3 sequence_format_converter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sequence_format_converter.py Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,208 @@ +#!/usr/bin/env python +# +import argparse +import logging +import sys +from collections import defaultdict + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument( + '--input', action="store", type=str, + help="input file, accepted format: fastq, fasta, fasta_weigthed, \ + tabular") + the_parser.add_argument( + '--output', action="store", type=str, help="output converted file") + the_parser.add_argument( + '--format', action="store", type=str, + help="select output format (fasta, fasta_weigthed, tabular") + args = the_parser.parse_args() + return args + + +class Sequencing: + + def __init__(self, input, output, format): + self.input = input + self.output = open(output, 'w') + self.outputformat = format + self.inputformat = self.detectformat(self.input) + self.seqdic = defaultdict(int) + self.read(self.input, self.inputformat) + self.write(self.output, self.outputformat) + + def detectformat(self, input): + input = open(input, 'r') + block = [] + reference = ['A', 'T', 'G', 'C', 'N'] + format = '' + try: + for l in range(4): + block.append(input.readline()[:-1]) + except: + logging.info("File hasn't at leat four lines !") + sys.exit("File hasn't at leat four lines !") + input.close() + line1, line2, line3, line4 = block[0], block[1], block[2], block[3] + if line1[0] == '>' and line3[0] == '>': + logging.info("'>' detected in lines 1 and 3") + sequence = ''.join([line2, line4]).upper() + nucleotides = set([base for base in sequence]) + for nucleotide in nucleotides: + if nucleotide not in reference: + logging.info("But other nucleotides that A, T, G, C or N") + sys.exit('input appears to be Fasta but with \ + unexpected nucleotides') + format = 'fasta' + elif line1[0] == '>' and line4[0] == '>': + logging.info("'>' detected in lines 1 and 4") + sequence = ''.join([line2, line3]).upper() + nucleotides = set([base for base in sequence]) + for nucleotide in nucleotides: + if nucleotide not in reference: + logging.info("But other nucleotides that A, T, G, C or N") + sys.exit('input appears to be Fasta but with \ + unexpected nucleotides') + format = 'fasta' + elif line1[0] == '>': + logging.info("'>' detected in lines 1") + sequence = ''.join([line2, line3, line4]).upper() + nucleotides = set([base for base in sequence]) + for nucleotide in nucleotides: + if nucleotide not in reference: + logging.info("But other nucleotides that A, T, G, C or N") + sys.exit('input appears to be Fasta but with \ + unexpected nucleotides') + format = 'fasta' + if format == 'fasta': + try: + for line in block: + if line[0] == '>': + int(line.split('_')[-1]) + return 'fastaw' + except: + return 'fasta' + if line1[0] == '@' and line3[0] == '+': + nucleotides = set([base for base in line2]) + for nucleotide in nucleotides: + if nucleotide not in reference: + logging.info("Looks like fastq input but other nucleotides \ + that A, T, G, C or N") + sys.exit("input appears to be Fastq \ + but with unexpected nucleotides") + return 'fastq' + for line in block: + if len(line.split('\t')) != 2: + logging.info("No valid format detected") + sys.exit('No valid format detected') + try: + int(line.split('\t')[-1]) + except: + logging.info("No valid format detected") + sys.exit('No valid format detected') + for nucleotide in line.split('\t')[0]: + if nucleotide not in reference: + logging.info("No valid format detected") + sys.exit('No valid format detected') + return 'tabular' + + def read(self, input, format): + input = open(input, 'r') + if format == 'fasta': + try: + self.readfasta(input) + except: + logging.info("an error occured while reading fasta") + elif format == 'fastaw': + try: + self.readfastaw(input) + except: + logging.info("an error occured while reading fastaw") + elif format == 'tabular': + try: + self.readtabular(input) + except: + logging.info("an error occured while reading tabular") + elif format == 'fastq': + try: + self.readfastq(input) + except: + logging.info("an error occured while reading fastq") + else: + logging.info("no valid format detected") + sys.exit('No valid format detected') + + def readfastaw(self, input): + for line in input: + if line[0] == ">": + weigth = int(line[:-1].split("_")[-1]) + else: + self.seqdic[line[:-1]] += weigth + input.close() + + def readfasta(self, input): + ''' this method is able to read multi-line fasta sequence''' + for line in input: + if line[0] == ">": + try: + # to dump the sequence of the previous item + # try because of first missing stringlist variable + self.seqdic["".join(stringlist)] += 1 + except NameError: + pass + stringlist = [] + else: + try: + stringlist.append(line[:-1]) + except UnboundLocalError: + # if file went through filter and contains only empty lines + logging.info("first line is empty.") + try: + self.seqdic["".join(stringlist)] += 1 # for the last sequence + except NameError: + logging.info("input file has not fasta sequences.") + input.close() + + def readtabular(self, input): + for line in input: + fields = line[:-1].split('\t') + self.seqdic[fields[0]] += int(fields[1]) + input.close() + + def readfastq(self, input): + linecount = 0 + for line in input: + linecount += 1 + if linecount % 4 == 2: + self.seqdic[line[:-1]] += 1 + input.close() + + def write(self, output, format='fasta'): + if format == 'fasta': + headercount = 0 + for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True): + for i in range(self.seqdic[seq]): + headercount += 1 + output.write('>%s\n%s\n' % (headercount, seq)) + elif format == 'fastaw': + headercount = 0 + for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True): + headercount += 1 + output.write('>%s_%s\n%s\n' % (headercount, + self.seqdic[seq], seq)) + elif format == 'tabular': + for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True): + output.write('%s\t%s\n' % (seq, self.seqdic[seq])) + output.close() + + +def main(input, output, format): + Sequencing(input, output, format) + + +if __name__ == "__main__": + args = Parser() + log = logging.getLogger(__name__) + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + main(args.input, args.output, args.format) diff -r 000000000000 -r a8aacccd79a3 sequence_format_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sequence_format_converter.xml Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,169 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +The tool performs all pairwise conversions between sequence formats fasta, fastaw and tabular. + +The tool is also able to convert fastq format in any of the formats fasta, fastaw and tabular. + +The format of the input is automatically detected by the tool. + +**Formats** + +*Fasta* + +>id1 + +ATGCATGACCAGATAGGAC + +>id2 + +ATGCATGACCAGATAGGAC + +Note that the tool handles fasta sequences over multiple lines + + +---------- + +*Fastaw* + +Allows to reduce the size of a fasta file of sequence reads: + +>id1_n1 + +ATGCATGACCAGATAGGAC + +>id2_n2 + +ATGCATGACCAGATAGGAC + +etc... + +Here n1 and n2 are integers that indicate the number of reads of the sequence found in the sequencing dataset + +Note that if 2 fastaw files are merged (e.g. by concatenation), the values of the number of reads are wrong. + +These values can simply be re-computed by submitting the merged file to a fastaw conversion with the *sequence_format_converter* tool ! + + +---------- + +*Tabular* + +Is a tabular version of fastaw without fasta headers: + +column 1 column2 + +ATGCATGACCAGATAGGAC n1 + +ATGCATGACCAGATAGGAC n2 + + +---------- + +*Fastq* + +@HWI-1 + +ATGCATGACCAGATAGGAC + +\+ + +BBBA;ACB9ABCBABB@@/ + +@HWI-2 + +ATGCATGACCAGATAGGAC + +\+ + +?03@?82?B>C@B>@CC?0 + + + + + diff -r 000000000000 -r a8aacccd79a3 test-data/fastqTofasta.sorted.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastqTofasta.sorted.fa Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,500 @@ +>1 +>10 +>100 +>101 +>102 +>103 +>104 +>105 +>106 +>107 +>108 +>109 +>11 +>110 +>111 +>112 +>113 +>114 +>115 +>116 +>117 +>118 +>119 +>12 +>120 +>121 +>122 +>123 +>124 +>125 +>126 +>127 +>128 +>129 +>13 +>130 +>131 +>132 +>133 +>134 +>135 +>136 +>137 +>138 +>139 +>14 +>140 +>141 +>142 +>143 +>144 +>145 +>146 +>147 +>148 +>149 +>15 +>150 +>151 +>152 +>153 +>154 +>155 +>156 +>157 +>158 +>159 +>16 +>160 +>161 +>162 +>163 +>164 +>165 +>166 +>167 +>168 +>169 +>17 +>170 +>171 +>172 +>173 +>174 +>175 +>176 +>177 +>178 +>179 +>18 +>180 +>181 +>182 +>183 +>184 +>185 +>186 +>187 +>188 +>189 +>19 +>190 +>191 +>192 +>193 +>194 +>195 +>196 +>197 +>198 +>199 +>2 +>20 +>200 +>201 +>202 +>203 +>204 +>205 +>206 +>207 +>208 +>209 +>21 +>210 +>211 +>212 +>213 +>214 +>215 +>216 +>217 +>218 +>219 +>22 +>220 +>221 +>222 +>223 +>224 +>225 +>226 +>227 +>228 +>229 +>23 +>230 +>231 +>232 +>233 +>234 +>235 +>236 +>237 +>238 +>239 +>24 +>240 +>241 +>242 +>243 +>244 +>245 +>246 +>247 +>248 +>249 +>25 +>250 +>26 +>27 +>28 +>29 +>3 +>30 +>31 +>32 +>33 +>34 +>35 +>36 +>37 +>38 +>39 +>4 +>40 +>41 +>42 +>43 +>44 +>45 +>46 +>47 +>48 +>49 +>5 +>50 +>51 +>52 +>53 +>54 +>55 +>56 +>57 +>58 +>59 +>6 +>60 +>61 +>62 +>63 +>64 +>65 +>66 +>67 +>68 +>69 +>7 +>70 +>71 +>72 +>73 +>74 +>75 +>76 +>77 +>78 +>79 +>8 +>80 +>81 +>82 +>83 +>84 +>85 +>86 +>87 +>88 +>89 +>9 +>90 +>91 +>92 +>93 +>94 +>95 +>96 +>97 +>98 +>99 +AATGGCACTGGAAGAATTCACGG +AATGGCACTGGAAGAATTCACGG +AATGGCACTGGAAGAATTCACGGG +AATGGCACTGGAAGAATTCACGGG +AATGGCACTGGAAGAATTCACGGG +AATGGCACTGGAAGAATTCACGGG +AATGGCACTGGAAGAATTCACGGGT +AATGGCACTGGAAGAATTCACGTG +AATTGCACTAGTCCCGGCCTG +ACTGAATTCTCGTGGGTCTGCAT +AGGACGGGAAGGTGTCAACG +ATAAAGCTAGATTACCAAAGCAT +CAAATTCGGTTCTAGAGAGGTT +CGAATAGCGTTGTGACTGA +CGGACGGTATATGGGTTAATATT +CGGATGATGGTTCACAACGACC +CGGCACATGTTGAAGTACACTCA +CGGCACATGTTGAAGTACACTCA +CGGCACATGTTGAAGTACACTCAA +CGGCACATGTTGAAGTACACTCAA +CTGACTAGATCCACACTCATTA +GGACGGAGAACTGATAAGGGCTCGG +GGCGAACATGGATCTAGTGCACG +GGGAGCGAGACGGGGACTCAC +GGGAGCGAGACGGGGACTCACT +GGGAGCGAGACGGGGACTCACT +TAAAGCTAGATTACCAAAGCAT +TAAAGCTAGATTACCAAAGCAT +TAAAGCTAGATTACCAAAGCAT +TAAGGAAATAGTAGCCGTGAT +TAAGGAAATAGTAGCCGTGAT +TAAGGAAATAGTAGCCGTGAT +TAGCACCACATGATTCGGCT +TAGGAACTTCATACCGTGCTCT +TAGGAACTTCATACCGTGCTCT +TATCACAGCCAGCTTTGAGGAG +TATCACAGCCATTTTGACGAGTT +TATCACAGCCATTTTGACGAGTT +TATTGCACTTGAGACGGCCTTA +TCAGGTACCTGAAGTAGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCGTTAT +TCTTTGGTATTCTAGCTGTAGA +TGACTAGATCCACACTCATTA +TGACTAGATCCACACTCATTAA +TGACTAGATCCACACTCATTAA +TGACTAGATCCACACTCATTAC +TGACTAGATTCACACTCATTA +TGGAATGTAAAGAAGAATGGAG +TGGAATGTAAAGAAGTATGG +TGGAATGTAAAGAAGTATGG +TGGAATGTAAAGAAGTATGGA +TGGAATGTAAAGAAGTATGGA +TGGAATGTAAAGAAGTATGGA +TGGAATGTAAAGAAGTATGGA +TGGAATGTAAAGAAGTATGGA +TGGAATGTAAAGAAGTATGGA +TGGAATGTAAAGAAGTATGGA +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAGA +TGGAATGTAAAGAAGTATGGAGA +TGGAATGTAAAGAAGTATGGAGT +TGGAATGTAAAGAATTATGGAG +TGGAATGTAAAGGAGTATGGAG +TGGACGGAGAACTGATAAGG +TGGACGGAGAACTGATAAGG +TGGACGGAGAACTGATAAGGG +TGGACGGAGAACTGATAAGGG +TGGACGGAGAACTGATAAGGG +TGGACGGAGAACTGATAAGGG +TGGACGGAGAACTGATAAGGG +TGGACGGAGAACTGATAAGGG +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGCA +TGGACGGAGAACTGATAAGGGCA +TGGACGGAGAACTGATAAGGGCA +TGGACGGAGAACTGATAAGGGCA +TGGACGGAGAACTGATAAGGGCA +TGGACGGAGAACTGATAAGGGCA +TGGACGGAGAACTGATAAGGGCA +TGGACGGAGAACTGATAAGGGCA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAAA +TGGACGGAGAACTGATAAGGGCAT +TGGACGGAGAACTGATAAGGGCT +TGGACGGAGAACTGATAAGGGCTT +TGGACGGAGAACTGATAAGGGT +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAAA +TGTGATGTGACGTAGTGGAAA +TGTGATGTGACGTAGTGGAAC diff -r 000000000000 -r a8aacccd79a3 test-data/fastqTofastaw.sorted.faw --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastqTofastaw.sorted.faw Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,110 @@ +>10_3 +>11_3 +>12_2 +>13_2 +>14_2 +>15_2 +>16_2 +>17_2 +>18_2 +>19_2 +>1_86 +>20_2 +>21_2 +>22_2 +>23_1 +>24_1 +>25_1 +>26_1 +>27_1 +>28_1 +>29_1 +>2_43 +>30_1 +>31_1 +>32_1 +>33_1 +>34_1 +>35_1 +>36_1 +>37_1 +>38_1 +>39_1 +>3_16 +>40_1 +>41_1 +>42_1 +>43_1 +>44_1 +>45_1 +>46_1 +>47_1 +>48_1 +>49_1 +>4_10 +>50_1 +>51_1 +>52_1 +>53_1 +>54_1 +>55_1 +>5_9 +>6_8 +>7_7 +>8_6 +>9_4 +AATGGCACTGGAAGAATTCACGG +AATGGCACTGGAAGAATTCACGGG +AATGGCACTGGAAGAATTCACGGGT +AATGGCACTGGAAGAATTCACGTG +AATTGCACTAGTCCCGGCCTG +ACTGAATTCTCGTGGGTCTGCAT +AGGACGGGAAGGTGTCAACG +ATAAAGCTAGATTACCAAAGCAT +CAAATTCGGTTCTAGAGAGGTT +CGAATAGCGTTGTGACTGA +CGGACGGTATATGGGTTAATATT +CGGATGATGGTTCACAACGACC +CGGCACATGTTGAAGTACACTCA +CGGCACATGTTGAAGTACACTCAA +CTGACTAGATCCACACTCATTA +GGACGGAGAACTGATAAGGGCTCGG +GGCGAACATGGATCTAGTGCACG +GGGAGCGAGACGGGGACTCAC +GGGAGCGAGACGGGGACTCACT +TAAAGCTAGATTACCAAAGCAT +TAAGGAAATAGTAGCCGTGAT +TAGCACCACATGATTCGGCT +TAGGAACTTCATACCGTGCTCT +TATCACAGCCAGCTTTGAGGAG +TATCACAGCCATTTTGACGAGTT +TATTGCACTTGAGACGGCCTTA +TCAGGTACCTGAAGTAGCG +TCAGGTACCTGAAGTAGCGCGCG +TCAGGTACCTGAAGTAGCGCGCGTTAT +TCTTTGGTATTCTAGCTGTAGA +TGACTAGATCCACACTCATTA +TGACTAGATCCACACTCATTAA +TGACTAGATCCACACTCATTAC +TGACTAGATTCACACTCATTA +TGGAATGTAAAGAAGAATGGAG +TGGAATGTAAAGAAGTATGG +TGGAATGTAAAGAAGTATGGA +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAGA +TGGAATGTAAAGAAGTATGGAGT +TGGAATGTAAAGAATTATGGAG +TGGAATGTAAAGGAGTATGGAG +TGGACGGAGAACTGATAAGG +TGGACGGAGAACTGATAAGGG +TGGACGGAGAACTGATAAGGGC +TGGACGGAGAACTGATAAGGGCA +TGGACGGAGAACTGATAAGGGCAA +TGGACGGAGAACTGATAAGGGCAAA +TGGACGGAGAACTGATAAGGGCAT +TGGACGGAGAACTGATAAGGGCT +TGGACGGAGAACTGATAAGGGCTT +TGGACGGAGAACTGATAAGGGT +TGTGATGTGACGTAGTGGAA +TGTGATGTGACGTAGTGGAAA +TGTGATGTGACGTAGTGGAAC diff -r 000000000000 -r a8aacccd79a3 test-data/fastqTotabular.sorted.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastqTotabular.sorted.tab Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,55 @@ +AATGGCACTGGAAGAATTCACGG 2 +AATGGCACTGGAAGAATTCACGGG 4 +AATGGCACTGGAAGAATTCACGGGT 1 +AATGGCACTGGAAGAATTCACGTG 1 +AATTGCACTAGTCCCGGCCTG 1 +ACTGAATTCTCGTGGGTCTGCAT 1 +AGGACGGGAAGGTGTCAACG 1 +ATAAAGCTAGATTACCAAAGCAT 1 +CAAATTCGGTTCTAGAGAGGTT 1 +CGAATAGCGTTGTGACTGA 1 +CGGACGGTATATGGGTTAATATT 1 +CGGATGATGGTTCACAACGACC 1 +CGGCACATGTTGAAGTACACTCA 2 +CGGCACATGTTGAAGTACACTCAA 2 +CTGACTAGATCCACACTCATTA 1 +GGACGGAGAACTGATAAGGGCTCGG 1 +GGCGAACATGGATCTAGTGCACG 1 +GGGAGCGAGACGGGGACTCAC 1 +GGGAGCGAGACGGGGACTCACT 2 +TAAAGCTAGATTACCAAAGCAT 3 +TAAGGAAATAGTAGCCGTGAT 3 +TAGCACCACATGATTCGGCT 1 +TAGGAACTTCATACCGTGCTCT 2 +TATCACAGCCAGCTTTGAGGAG 1 +TATCACAGCCATTTTGACGAGTT 2 +TATTGCACTTGAGACGGCCTTA 1 +TCAGGTACCTGAAGTAGCG 1 +TCAGGTACCTGAAGTAGCGCGCG 10 +TCAGGTACCTGAAGTAGCGCGCGTTAT 1 +TCTTTGGTATTCTAGCTGTAGA 1 +TGACTAGATCCACACTCATTA 1 +TGACTAGATCCACACTCATTAA 2 +TGACTAGATCCACACTCATTAC 1 +TGACTAGATTCACACTCATTA 1 +TGGAATGTAAAGAAGAATGGAG 1 +TGGAATGTAAAGAAGTATGG 2 +TGGAATGTAAAGAAGTATGGA 7 +TGGAATGTAAAGAAGTATGGAG 43 +TGGAATGTAAAGAAGTATGGAGA 2 +TGGAATGTAAAGAAGTATGGAGT 1 +TGGAATGTAAAGAATTATGGAG 1 +TGGAATGTAAAGGAGTATGGAG 1 +TGGACGGAGAACTGATAAGG 2 +TGGACGGAGAACTGATAAGGG 6 +TGGACGGAGAACTGATAAGGGC 86 +TGGACGGAGAACTGATAAGGGCA 8 +TGGACGGAGAACTGATAAGGGCAA 9 +TGGACGGAGAACTGATAAGGGCAAA 1 +TGGACGGAGAACTGATAAGGGCAT 1 +TGGACGGAGAACTGATAAGGGCT 1 +TGGACGGAGAACTGATAAGGGCTT 1 +TGGACGGAGAACTGATAAGGGT 1 +TGTGATGTGACGTAGTGGAA 16 +TGTGATGTGACGTAGTGGAAA 2 +TGTGATGTGACGTAGTGGAAC 1 diff -r 000000000000 -r a8aacccd79a3 test-data/input.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.fa Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,500 @@ +>1 +TAGTTACCTTCATATCTCTCTTTA +>2 +TCTATTCATACAAAACACTAATACCC +>3 +ACAACCTCAACTCATATTT +>4 +TATAATTTTATTTTATATTTTCTCT +>5 +TCTTCTATATAATCCTTTATTATAA +>6 +TAAAATAAACCAAAACCCAAAAAT +>7 +AATCTACAATTTCCATTACGACTCC +>8 +TTTCCGACAAATACACCATCTTC +>9 +CAGATTCACTGATTTTCTTACGCC +>10 +CAAGAATACAAAAAATACTAATTA +>11 +AACTCTCTAATTTAACTTTGTGC +>12 +AAAAACACACAGACACAAGCAGCAAT +>13 +TTACTAGATCCACCCTCATTA +>14 +ATGTTATTTACTTTTTCCCCTTATA +>15 +CGATATTTTCTCCTCTTACC +>16 +TACAGAGAAATATACAACACTCACC +>17 +ATGAAATTCGAACAATACGTC +>18 +GAGAATAAATATTTCAATGGTCTATTG +>19 +TGCTTTTACTACATATTTTTTATTTTTTTA +>20 +CATACCTTAAATTATCTCTTTCTT +>21 +TTTGTTTTTCATTTTTTTATCTTT +>22 +TTATTATCTATTTTAATTTTTCTTAA +>23 +TATTATCATCTCGTTCTTCCTTCTC +>24 +TCAACTGGCAAGAATTTTTGAAAATT +>25 +TACAAATTTTTTTTTCTTTCTTAT +>26 +TTTTCTTTTTTTCATTTTCTCTTTTA +>27 +AAGATGGAGTAGTTTTTT +>28 +TCTCATCTTACAATTTTTTAAAACTT +>29 +CATACCTACAAAAAAGCTTCTCTTAC +>30 +TTTTATATTTCCTTATATCTTTACTA +>31 +GGAGATTGTAGAACGAAAGGAAAAT +>32 +TCTATTTCTTTATTTTTTTTATTAT +>33 +CGGACGGTATATTTTTTAATATAA +>34 +TTCTTGGACTACACATTTTTTATTGTTTTA +>35 +TACTATATACTTCTTCAAATCACA +>36 +ATTTTACTTCATCATTTTC +>37 +TATTTCCAACCTTCAACCTCAAATAA +>38 +CACGACTTTATTCTTTTTATCTCA +>39 +TTCTTTTACTACATATTTTTTATTTTTTTA +>40 +TTTATTACAACCCTATCTTACCTCAA +>41 +CGATATTTTCTCCTCGTACC +>42 +TAATTACCATTGCTAACTATCCA +>43 +CATTAATTCATCCATTTAAACTAA +>44 +GAAACAAACAACACATACCCTCTGGC +>45 +TACTTTTTTCTTAATTTTTTATTAAAC +>46 +TAACTTTAACTTTTTTACT +>47 +TTCTTTTACTACATATTGTTTATTTTTTTA +>48 +TCTAGTCTGAGCGTAGTACCAGATTG +>49 +TTTTTTATCAATTTTCACCATTCAT +>50 +AATGACACACTCTTCATCAAC +>51 +TAACATAAATTTTAATCATAAATTG +>52 +TCTATATTATTTTTATCAATTTTCACC +>53 +TCCCAACCCTCGAGCATCATTTTC +>54 +TAGTCATACATACCTAATTATACATA +>55 +TACAAAAAATGCGAAAATTGACCCT +>56 +GAGAACTTTTAATCATTTTAC +>57 +TCTTATTTTAATCTTCCAATTTC +>58 +CGGCACATGTTGAATTACACTCA +>59 +CAGTTTCACAAAAGATCTTTTAA +>60 +GCCAACGACCATACCACGA +>61 +CAAATAACAAACTGAATAAACGAAA +>62 +TAGTTTCACTACTTTATTCTTTTTA +>63 +TGAGGAAAACAGAAAAATGAGAGACA +>64 +TATATAAATCTTCAACATCAA +>65 +TGATTTACTTACATTCTTTTTTT +>66 +CTTCTTTTACTACATATTTTTTATTTTTTTA +>67 +TACTGAAAACGGGCGCATATCAGTGG +>68 +TATTCAATCACTCCATTATATATAACA +>69 +TATATTGCCTCCCCATAATCCTT +>70 +TCGCCGTAAAGCCAGTCGTTCTCC +>71 +TTTAAACACTTCCTACATCAAATTTC +>72 +TTTGTTTTTTACTATATTT +>73 +TCTTTTTTTTAATACTTATTTTCATT +>74 +TAAGTTTTTAATCATTTTTTTT +>75 +TGAGAATGACTTCTTCACGATCTCTT +>76 +AATTTATTTAATTTATATTCTAACTAA +>77 +AAAAAATATCTTTTTTAACTCGTGGCC +>78 +AAGACAACAATGACATATAAGACG +>79 +TGAGCTAGAACTGCACCCACTCCA +>80 +ACTAAACTTTTCTTACCATATTTCTA +>81 +TATTTCACTTTATACTTCCTTAA +>82 +TATCTATCTTTGATCTTCTTTTCA +>83 +TTTTTATCAATTTTCACCATTC +>84 +TAAAAATAATTGTCTTTAATTTCA +>85 +TTAAAGACGCAACAACTAACATT +>86 +TAACCTTGCAGAACTATACGATTCAAA +>87 +TACTATTTTATTATACATACATACATTA +>88 +TAGTGGACTTTAAAAAAAAAAAAAAAAAA +>89 +AATGTCACTTGAAGAATTCACGT +>90 +TAATAAGAAACTGTTCAAACAATCCAC +>91 +AAGCACGCCTTACCACAATTTATAA +>92 +CCATATATGACTGACTCATTTCAC +>93 +TTAATATGTAATTTCATACCTCAC +>94 +AGGATTTTTAAGCCCATATGTTTCC +>95 +ACAGCAGGACGGTGATCA +>96 +TGATGACGGGCAGCAGGGATTTTC +>97 +TTGTAAAACATTCTTTCTCCTGAC +>98 +TTCTTCCTTTTATCCTCTCTTAA +>99 +ATTATTAATAAATTATTATAA +>100 +CTGGAAACTATTGATCAAATT +>101 +TACAACTAACATCCTTTCTTCTTCC +>102 +TCAAATGCAAATTGGATTTATGA +>103 +TCCTGAGGACGAGGGGCGTTTAGC +>104 +TACACAGACTTACAAAACACATCCTTC +>105 +GAAAGGAAGGGAAGAAAGCGAAAGGA +>106 +TTAAACAATTTGGAATTAATT +>107 +TAGCCTTTACTAGGCTTTTTCTAA +>108 +CGATATTTTCTCCTCTTACCT +>109 +TGGAATGTAAAGAAGTATGGAG +>110 +TTTTTAACTCCCATCATTTTTCCTC +>111 +TTTTTTATCATTTTTCACCTAAAAAA +>112 +GAACAATTTTTCAATTTTTTACATTA +>113 +TATGGATTATTTCAAAATTTTTTTTT +>114 +TAACGGAGCACGAGAACGAAGTGG +>115 +CAATTTTTAATTCCTTTTTTCTTCTT +>116 +AGATGTTGATCTAAACTCTCCCA +>117 +TGAGCGGAGAACCAGAGTTGATGAGC +>118 +TAACAAATAGAACGTTCTAATTTAAA +>119 +CTAGACAAGATGCTATAAATTTTAAA +>120 +TTTTCTTTACCCATCTTTACTTTCCC +>121 +TACACACTCATCAACCAAAGGACG +>122 +TCTTATTATCATTTTTTTATCCCTT +>123 +TCAGAGTTCTACAGGTCCTACGATT +>124 +TTTATTACTTAGTCATAATTCCAA +>125 +GCCGGGGCGTGAGATGTCTGCATTA +>126 +GACGAAACGCAACAACAAAATGGACG +>127 +TAGACTTTCTACTCATTATTAC +>128 +AAATTGCAAAGATGGAAAATAAAACT +>129 +CCTTACTCAACATACTTAATCATACTTA +>130 +TGCTTGGACTACATATGGTTGAGTG +>131 +GAATGATCGCACCACCACCTCAACGTT +>132 +TCCACCTATTTATCTTTTCTT +>133 +TGGACAAGAACCACGCGACGGGTGT +>134 +CAAGATATGAACAAAGCAAAGACAC +>135 +CAAACGGAACAAGACATCACCATC +>136 +NATTCTTACTCCATTTCAATTTACT +>137 +TAGAACTCGAACCAGAGCTCC +>138 +CGGAAAAGAATGTAGACCATTTAA +>139 +TACAAACGGAACTTTCGTCATAA +>140 +GGTATCTTTATATTTTAATTTTCTT +>141 +TATTCCGACAATACCTTCTTTAC +>142 +AACTTTAAATTTTTAATAACCTT +>143 +CATAATATAAACTTATCTT +>144 +TATTTATAAATTTTTTCTTGAGAC +>145 +TTTTTTTTTTGTTTTTATTTTTATCAT +>146 +TATTATACATAGAATAACAAATCTTT +>147 +TGGAGTAGCACAGTCGTCTGAAATC +>148 +TATTTCTTTTTTAACTTCTTTTC +>149 +TTATAATCACGGCACCCTATACA +>150 +TTCTTTTACTACATATTTTTTATTTTT +>151 +TAGCGAGATGGACCAACGTGCTGT +>152 +CCAGAAAACAATACAACATCCTCA +>153 +TCCGAAAACAAGGCCCGTCGCT +>154 +TACTCAATAGAACTCTACTCACTCATA +>155 +AACGGGGAATAAGGGTTCG +>156 +TCAGTCTTTTTTTCTCTCCTA +>157 +AATATAAAAATACAATCAACCATTGCA +>158 +GGACGGAGAACTGATAAGGGCA +>159 +TAAAGAAGAAGAATTGATTTTAAT +>160 +TCATTACACTTCTTACAAAAC +>161 +CCGCGATCTGCTTATTTATAATCTT +>162 +TCTAAACACCCACGAAAATCTCTTAC +>163 +AACAGGAAAAACAGAAGGATTTCTA +>164 +TCTCTTTTATTTTTATCTTTCCTT +>165 +AACATTTTATCAATTATACATTA +>166 +GCAGATAGAAATCAATACAAAAATC +>167 +TTAATGACACACGGGAAAAACACCG +>168 +TACAGACAACACATACGGACTTAAGT +>169 +TCCACAACAACTCTATCTAAAGCATT +>170 +ATAAAGCTAGATTACCAAAGCAT +>171 +TACCTCTTTATTAACCTCCACCTCTA +>172 +TACACCTCTTTTTACTTTTTTATT +>173 +CACCGAACCGGGAAGGCGAACAAC +>174 +TAGGTACTTACCTTTTTTTTACACAA +>175 +AGGAATATGATGAAATAAAAAAAT +>176 +TATTATTTTTTTATTCCATTCATAT +>177 +TAAAATAAATAAGTCCGACGACAA +>178 +TCTGTATTTGACTTATTACTTTCTCC +>179 +AAGGAATTAAAGCAATAATTCTAA +>180 +TTCATTTTATTTTTAAATATCTTTTTT +>181 +TTAGTATTAATCTTCACTTAA +>182 +TATGGAGAAACAGCGATATAAGTCA +>183 +CCCGAAAAGCCGAGGACGACTTA +>184 +CACAGACTGAGGCAGAAAAAACAA +>185 +TCAAGCCTTTTGAAGAACTGACCTAAA +>186 +TAAGAAACTGAGCTAACGCAATGTACC +>187 +CGATATTTTCTCCTCTGACC +>188 +TAACTACTTTTACATTAATACTAA +>189 +ACCAGCACCTTCCGACTCAACGTCAAA +>190 +TCAAAGAACAATGTAAAGCCGCGAC +>191 +TTGATTCTTCTTTTTCACAAAA +>192 +TACAAAACAAACAAATTACAATCTAAA +>193 +GAAACCATTATCTTATCTTTATACA +>194 +CTATATTTTCTCTCTTACC +>195 +TCAGAAGAACAGAGAATTGATTTT +>196 +TAATAAATTATTAAATAAAAAAAAAA +>197 +CCGACCGAGCAAATAAACACAGGAACG +>198 +AAGTGAAGAAGTAGTTTTT +>199 +TGGACGGAGAACTGATAAGGGC +>200 +TGCTTGGACTACATATGGTTGAGGG +>201 +TACAAGACTAAAACAAACGTGAAGT +>202 +TGAAACTGAAACTAACATACAAAATATT +>203 +TATCTGATCAACAATCTTTTCCCAT +>204 +TTTAAGACTTATGAGCTTG +>205 +CAAGGCTCAGAAGAACATCACCAAGACC +>206 +TTCAAGTAGATTGCATTTTTTAATA +>207 +CGCAACCAGCAGCAACTCCTAGCAT +>208 +TACAAACGGAACTTTCTTCATAACTTC +>209 +ACAAATCATAAATTTTTTTTTACT +>210 +TCCGAAAAATCGTAGGACCCGGGCA +>211 +CGCCGCAAGATGAATACTCTAATGA +>212 +TGACCAAAGACAAACAAACAATAAATA +>213 +AAAAGGAAAAACAGAAAAATTGGG +>214 +TTCTTGATAACGCATCTTCTACAT +>215 +TACTGAAACAAGGAAACACAAGC +>216 +TCAAAAAGTAATAGGGATCGTTA +>217 +TGGAATGTAAAGAAGTATGGAG +>218 +TAAAATTGTAATATTTAAATAATAT +>219 +GAGGATTAAAAGAACGGTTTATAA +>220 +TTCTTTGACTACATATTTTTTATT +>221 +TACAAACGTAATTTTCGCATAACATC +>222 +AGAACAATTAAATAAAATAGCATA +>223 +TAATAATTTAAATAAATATAAATTT +>224 +AATTGCAACAGAGACTGGAA +>225 +TTAAGTTTTAGACATAATCTATTACAA +>226 +TGAAAGGAAAAACAGGACACGGGA +>227 +AACAGGGAGATCAACAGCGTTGACA +>228 +GGACGGAGAACTGATAATGGC +>229 +TGTGTAATCTTTCTACTTCTTCTAC +>230 +TTTACCAGAGGAGTCGAGTTTTT +>231 +AAAATCGACTGCCGAAAACATTTTAA +>232 +TGCTTGGACTACATATGGTTGAGTG +>233 +TACAGACAACACATACGGACTTAA +>234 +GAAGAGGAGGAGGAGTTTGTAAG +>235 +TTACAATCTACTATTCTTTTATTA +>236 +TTATTACATCGTCCACATATAACAAAA +>237 +TTTTTAACTCATTTTACAATTAAAC +>238 +AAAACCCGGACAAACCATCGGAGGA +>239 +CAACACATGACGCGACAATTCTTG +>240 +TACACACTCATCAACCAAAGTACGTA +>241 +TTTTCATTTCTTCTTCAAATCCTTT +>242 +AACAACTGCAAACATCTACCACA +>243 +TAGTTTTAAATATTTCTTTTTTTC +>244 +AAAGACAAAAGAAATACAGGCACT +>245 +TTTGATACCTTTATACCATACCTATT +>246 +ACAGCAGGACGGTGATCA +>247 +TTCTAGCACAACACGCACACATATA +>248 +TAATCAAAAAACTCTTCATTTTTA +>249 +TAGGAACTTCATACCGGTCTC +>250 +TATTTCAGCAACAGACTAAGACTAA diff -r 000000000000 -r a8aacccd79a3 test-data/input.fastqsanger --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.fastqsanger Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,1000 @@ +@HWI-49 +TAAAGCTAGATTACCAAAGCAT ++ +BBBA;ACB9ABCBABB@@/?BB +@HWI-119 +TGGACGGAGAACTGATAAGGGC ++ +?03@?82?B>C@B>@CC?032< +@HWI-127 +TGGACGGAGAACTGATAAGGGC ++ +A>9@>;9>:=@3A7@BB##### +@HWI-135 +TGGACGGAGAACTGATAAGGGC ++ +BB6BA5AC+BBBCC6@B>C<'< +@HWI-136 +TGGACGGAGAACTGATAAGGGC ++ +B22>A=?;7:;BB>=C@>)1(: +@HWI-166 +CGGCACATGTTGAAGTACACTCAA ++ +B<2>BCBCACC@C@3@BBCBCCCB +@HWI-187 +TGGACGGAGAACTGATAAGGGC ++ +B;>@B82:4;B?B?@BB=5:2< +@HWI-199 +GGGAGCGAGACGGGGACTCAC ++ +@?@@8?6;@AB=72;;= +@HWI-36 +ATAAAGCTAGATTACCAAAGCAT ++ +BBBB?AAAA9AABA>7@@:-9AB +@HWI-39 +AATGGCACTGGAAGAATTCACGG ++ +54=4440)@0365:83:<;5### +@HWI-227 +TGGAATGTAAAGAAGTATGGAG ++ +BB>ACCBCCC@9@A4>CA66@> +@HWI-249 +TGGAATGTAAAGAAGTATGGAG ++ +A9>?BA2@AA>5<<4A?:BB:BBA?9<;4?B>3-78 +@HWI-254 +TGGAATGTAAAGAAGTATGGAG ++ +B>A=B@1>B?47<6=A??BB;@<@BABB@CC==:0? +@HWI-332 +TGGACGGAGAACTGATAAGGGC ++ +B?8@A>B;BAB@BA?CB@A731 +@HWI-211 +TGGAATGTAAAGAAGTATGGAGA ++ +A61=BA;7B@BBA:9:B?;;@2= +@HWI-333 +TGGAATGTAAAGAAGTATGGA ++ +BA>>AB8@BB?=>@98BBA># +@HWI-355 +TGGAATGTAAAGAAGTATGGAG ++ +B>8BBA7@CBA;>A2?BB2.>8 +@HWI-282 +TGGAATGTAAAGAATTATGGAG ++ +B>8>@A7@8=>>997-8A72## +@HWI-378 +TGGAATGTAAAGAAGTATGGAG ++ +B7=;AA8=A@@5=;4>BB31:2 +@HWI-372 +AATGGCACTGGAAGAATTCACGGG ++ +BBA;@;>A@;@=?@:?B@###### +@HWI-388 +TGGAATGTAAAGAAGTATGGAG ++ +BA@?BB?BBB??A?0:BA53<> +@HWI-399 +TGGAATGTAAAGAAGTATGGAG ++ +A;*>?B9@B<+1<@<4B@7)@- +@HWI-109 +TGGACGGAGAACTGATAAGGGC ++ +BC@CBCC@CBB9@8?CCC>AA; +@HWI-421 +TGGACGGAGAACTGATAAGGGC ++ +A66?A;0<@;B?A;?BB>38-7 +@HWI-412 +TGTGATGTGACGTAGTGGAA ++ +BA=?>B<>9><09?0=656= +@HWI-432 +TGGACGGAGAACTGATAAGGGC ++ +A;;B>=;=2@?0@@>?CBA@2= +@HWI-442 +TGGACGGAGAACTGATAAGGGC ++ +BBA?=;;>B6??@07AA##### +@HWI-470 +TGGAATGTAAAGAAGTATGGAG ++ +B?3>AC>:AA?AA<)7BA;7=: +@HWI-499 +TGGACGGAGAACTGATAAGGGC ++ +BA>??=:;B=B8A>>B9=/55= +@HWI-100 +TGGACGGAGAACTGATAAGGGC ++ +A;6@@96@;?A@BBABC?3<5> +@HWI-514 +TGACTAGATCCACACTCATTAA ++ +BAABC?7@C@ABABBBBABBBB +@HWI-521 +TGGACGGAGAACTGATAAGGGC ++ +A2=A5=2=>@B;C>:7A>>:1. +@HWI-545 +TGGACGGAGAACTGATAAGGGC ++ +BA?BBBAABAB@B>?BB@>A>< +@HWI-549 +TGGACGGAGAACTGATAAGGGC ++ +A66<;;0<>:@4A1<>>##### +@HWI-431 +TATCACAGCCAGCTTTGAGGAG ++ +B>B?B?@A?69?38<@5902;9 +@HWI-579 +TGGAATGTAAAGAAGTATGGA ++ +BBB@A@BB>B=1>BA### +@HWI-583 +TGGACGGAGAACTGATAAGGGC ++ +BB:@BCB>>BBAB@>@BA8;00 +@HWI-587 +TAGCACCACATGATTCGGCT ++ +BA;/A;;A1BA:@BB>=4;? +@HWI-516 +TGGACGGAGAACTGATAAGGGCA ++ +B??>>AA>A9A3A69B?;64779 +@HWI-113 +TGGACGGAGAACTGATAAGGGCA ++ +BBC@@;>BCBCBB*BCC@95**C +@HWI-592 +TGGAATGTAAAGAAGTATGGAG ++ +BBBBBB?ABA;9@@2=B@)1=4 +@HWI-602 +TGGACGGAGAACTGATAAGGGC ++ +BA;>A@>=8=@?BB>B?:0906 +@HWI-608 +TGGACGGAGAACTGATAAGGGC ++ +BBC>CC9>??CAA9=B@=::36 +@HWI-590 +GGGAGCGAGACGGGGACTCACT ++ +=B=AAAA?CBAB@=A95=%:0; +@HWI-612 +CGGCACATGTTGAAGTACACTCA ++ +B?>BBBCBABCCA=7@AAABBAA +@HWI-610 +TCAGGTACCTGAAGTAGCGCGCG ++ +BB@;@2>;>?66>963####### +@HWI-629 +TGGACGGAGAACTGATAAGGGCA ++ +A68@?;6<0C39>;B4B@CB73@@;<(>BB +@HWI-648 +TGGACGGAGAACTGATAAGGGC ++ +A8=A?@9=275< +@HWI-377 +TGGACGGAGAACTGATAAGGGCAA ++ +B>5>4?A?A;>0:9<=0;;:0.88 +@HWI-658 +TGGAATGTAAAGAAGTATGGAG ++ +B>8>AB9@B?AA>=;>AA82<8 +@HWI-677 +TGGAATGTAAAGAAGTATGG ++ +A:0)2=AB<4 +@HWI-684 +TGGAATGTAAAGAAGTATGGAG ++ +BAA?BBA@BA==?>8@B?02:) +@HWI-699 +TGGACGGAGAACTGATAAGGGC ++ +B??B@>;<;>B?BB?AB>#### +@HWI-707 +TGGAATGTAAAGAAGAATGGAG ++ +BBAB9*===@7;;:(.,B??@; +@HWI-710 +TGGACGGAGAACTGATAAGGGC ++ +BA?AA?8>A=B?BA>AA?#### +@HWI-700 +TGGACGGAGAACTGATAAGGGC ++ +BCB>4BB@8=CB>130- +@HWI-780 +TGGACGGAGAACTGATAAGGGC ++ +BBAABCAA>0/29 +@HWI-766 +TGGACGGAGAACTGATAAGGGCAA ++ +A;3=C@->@B*B>@A=AB@A< +@HWI-815 +TGGAATGTAAAGAAGTATGGAG ++ +B915AB;:?=>7<>9?CA55@* +@HWI-839 +TGGACGGAGAACTGATAAGGGC ++ +A6;?A;;>A?@BB8=CB;583? +@HWI-884 +TGGAATGTAAAGAAGTATGGAG ++ +BBBABBBBBB?BB>1/@BB?BB +@HWI-897 +TGGAATGTAAAGAAGTATGGA ++ +BAA?BB?BBB>6@@=>BB70? +@HWI-671 +TGACTAGATCCACACTCATTAC ++ +B@BBBA;ACBBCBBBBA>ABBB +@HWI-901 +TGGACGGAGAACTGATAAGGGC ++ +B;7@@>;=@<@?A:>CB@;8:@ +@HWI-914 +TGGACGGAGAACTGATAAGGGC ++ +BBCCBCC:BBB@CCACBB<>AB +@HWI-926 +TGGAATGTAAAGAAGTATGGAG ++ +BC=AAB;BBAB@B@+7BAA7BA +@HWI-937 +TGGACGGAGAACTGATAAGGGC ++ +BBBBA4@>=;BAB=ACC@*8;< +@HWI-973 +TGGAATGTAAAGAAGTATGGAG ++ +A59>BB;@BB?>>=3;BA71:5 +@HWI-242 +TGGACGGAGAACTGATAAGGGCTT ++ +A31@<05=;>@1B>>AA@:;59A9> +@HWI-1008 +TGGACGGAGAACTGATAAGGGC ++ +A59=B=5=8@@>CA<9@=*,8> +@HWI-568 +TGGACGGAGAACTGATAAGGGCAA ++ +A88@BA;>?@A?BA0### +@HWI-911 +CTGACTAGATCCACACTCATTA ++ +BAC +@HWI-1021 +TGGACGGAGAACTGATAAGGGC ++ +B79>?B;>:?B;B@<>6?@>9>A=A=B;ACB>8;== +@HWI-1022 +GGGAGCGAGACGGGGACTCACT ++ +AA@AC=B=79ABCBA1@B236A +@HWI-1026 +TGGACGGAGAACTGATAAGGGCAAA ++ +B>?B>=AAB@?4B>>BBAAA>?A?A +@HWI-1033 +TGGAATGTAAAGAAGTATGG ++ +?43>AA;@A=>89?>=CB;>>;BBACB@CC@3:+? +@HWI-1059 +TGGACGGAGAACTGATAAGGGC ++ +BA?AABA@BBBABBBBBA980: +@HWI-1060 +TGGACGGAGAACTGATAAGGGC ++ +B?>=BCBA;BA<56<= +@HWI-1080 +TGGACGGAGAACTGATAAGGGC ++ +B@?@BCA@CA>ABA4.<7 +@HWI-1090 +CGGATGATGGTTCACAACGACC ++ +A;59A7?AA3AB>@B@B:2@@>;>>=BA>@>## +@HWI-1094 +TGGAATGTAAAGAAGTATGGAG ++ +BB:2@A:A@BA6A@=4B;5';; +@HWI-1115 +AATGGCACTGGAAGAATTCACGGG ++ +BCBAB@B@>><>@;;A<;>##### +@HWI-1120 +TGGAATGTAAAGAAGTATGGAG ++ +B67@B?2?BBA=A@09BA47?/ +@HWI-1122 +TGGACGGAGAACTGATAAGGGC ++ +B@5>;0;;B==3AB>BA=6:29 +@HWI-1127 +TGGACGGAGAACTGATAAGGGC ++ +A8BA>1### +@HWI-1128 +TGGAATGTAAAGAAGTATGGAG ++ +B??>BBC>>A9=@<5:B@36>A +@HWI-1119 +TCAGGTACCTGAAGTAGCGCGCG ++ +BB?>??A>B=(6?7######### +@HWI-1149 +TGGAATGTAAAGAAGTATGGAG ++ +BAA>@BB@B?<8?B??BAB;;BB?2### +@HWI-1159 +TGGACGGAGAACTGATAAGGGC ++ +BBBCBA<@;;BBBA@B@>36)? +@HWI-1175 +TGGACGGAGAACTGATAAGGG ++ +A0)>B@69;4>@BB>BAA=A: +@HWI-1174 +TGGACGGAGAACTGATAAGGGCAA ++ +B?>@A6@;@>@:?0:C@:###### +@HWI-1181 +TGGACGGAGAACTGATAAGGGC ++ +BCACC?BAABB?CBACCCAA?B +@HWI-1196 +TGGACGGAGAACTGATAAGGGC ++ +B>ABBC=>;=BAABB?BB@<81 +@HWI-1197 +TGGACGGAGAACTGATAAGGGCA ++ +BBBB>?A>;@B;BB@B@909879 +@HWI-1201 +TGACTAGATCCACACTCATTAA ++ +A>CB@A?BBBBCBB>BB@CBA? +@HWI-1206 +TGGACGGAGAACTGATAAGGGC ++ +B?ACBCC@BBBB@0?CB@;2;< +@HWI-985 +AATGGCACTGGAAGAATTCACGGG ++ +BB@6@A>@:.:>@96A######## +@HWI-1236 +TGGACGGAGAACTGATAAGG ++ +BA?AABBB>>BB@.. +@HWI-1245 +TGGAATGTAAAGAAGTATGGAGA ++ +BCBBBCB<=:?78/5ABBAAB9B +@HWI-1266 +GGCGAACATGGATCTAGTGCACG ++ +?ABAABCB@=@?A?=:0>3=>## +@HWI-1228 +TGGACGGAGAACTGATAAGGG ++ +<@:B@==?@>A@A>;BB?)06 +@HWI-1275 +TAAAGCTAGATTACCAAAGCAT ++ +BBBBBACA:BCBCBBBB@7;BB +@HWI-1279 +TGGAATGTAAAGAAGTATGGAG ++ +BBBBABBABA@8=ABAAB>4>2 +@HWI-1282 +TGGAATGTAAAGAAGTATGGAG ++ +B?8A@-5BB@:>= +@HWI-1312 +TGGACGGAGAACTGATAAGGGC ++ +B:2 +@HWI-1317 +TGTGATGTGACGTAGTGGAA ++ +BB?B?BB?@<:0<;9>=<<: +@HWI-1323 +TGGACGGAGAACTGATAAGGGC ++ +BAACB@@@BBBBCCBCC?;>5> +@HWI-1304 +TGTGATGTGACGTAGTGGAA ++ +A92@?@1@7?:)9?>>708@ +@HWI-1337 +TGGACGGAGAACTGATAAGGGC ++ +B?BCA6A@BBC@CB>BCB;=;A +@HWI-1344 +TGGAATGTAAAGAAGTATGGAG ++ +A869>@9@475;8 +@HWI-1418 +TGGAATGTAAAGAAGTATGGAGT ++ +B?B@BCBBBC@8@@6=B>//>7A +@HWI-1434 +TCAGGTACCTGAAGTAGCG ++ +B?6B>180?=)9AA3@### +@HWI-1332 +TATCACAGCCATTTTGACGAGTT ++ +BCB@BABB=BBBBBA58;8>;@B +@HWI-1447 +TGGACGGAGAACTGATAAGGGC ++ +BBBB6CBBCABBBCBCCC5A9= +@HWI-1449 +TGGACGGAGAACTGATAAGGGC ++ +B;>@>=@?7@BA7BBC<6;### +@HWI-1451 +TCAGGTACCTGAAGTAGCGCGCG ++ +BA@;@7<6:?91=########## +@HWI-1460 +TGGAATGTAAAGAAGTATGGAG ++ +BCCBCCCBCB?:?A?>BA75;< +@HWI-1375 +TGACTAGATTCACACTCATTA ++ +BCABCB?ACC>BA39C@5BC< +@HWI-1481 +TGGAATGTAAAGAAGTATGGAG ++ +B6/9B@5ABBA??>048989<0 +@HWI-1489 +TGTGATGTGACGTAGTGGAA ++ +BAA8ABBBAB;+::1AB78B +@HWI-1505 +TGGAATGTAAAGAAGTATGGA ++ +B?)=?A9;A=9BB### +@HWI-1508 +TGTGATGTGACGTAGTGGAA ++ +B>>A:=0?::3.960:#### +@HWI-1517 +TGGAATGTAAAGAAGTATGGAG ++ +B>>>BB>ABB=4==1=B>0)<0 +@HWI-1530 +TGGACGGAGAACTGATAAGGGCA ++ +BCBBBCB>BBBA@;?BA<>9;>:=B:A9>@>?3### +@HWI-1534 +TGGACGGAGAACTGATAAGGGC ++ +B9;BAA>=A@B?A>@BB@><4< +@HWI-1538 +TCAGGTACCTGAAGTAGCGCGCG ++ +BCCCBBBAA?):@@;;;:39### +@HWI-1547 +TGGACGGAGAACTGATAAGG ++ +A6;@@A6<>?B +@HWI-1572 +TGGAATGTAAAGAAGTATGGAG ++ +BAA@BBA@@<>?@A=9?9 +@HWI-1583 +TGGACGGAGAACTGATAAGGGC ++ +B>?B?50??@C@BABBBA66)8 +@HWI-1592 +TGGACGGAGAACTGATAAGGGC ++ +BBB>BBB;?>@@CB@BBAAA>= +@HWI-1522 +TGGACGGAGAACTGATAAGGGCAA ++ +A51BB?5?BABAA;ACA7=@;>@> +@HWI-1602 +CGGCACATGTTGAAGTACACTCAA ++ +?@;AB>C@@;=1A;4=?A?BCAC= +@HWI-1521 +TCAGGTACCTGAAGTAGCGCGCG ++ +BA?8?4>=?@>;:7;98###### +@HWI-1625 +TCAGGTACCTGAAGTAGCGCGCG ++ +BB@A@';9A;*=9?4<7994### +@HWI-1631 +TGGAATGTAAAGAAGTATGGAG ++ +B>A>BB6@BA=8<<6=BA75>8 +@HWI-1632 +TGTGATGTGACGTAGTGGAA ++ +BA??;B>><=<4=<7?:599 +@HWI-1636 +TGGACGGAGAACTGATAAGGGCA ++ +A5=AAAA +@HWI-1662 +TGGACGGAGAACTGATAAGGG ++ +A65=A@A7:B@=303 +@HWI-1666 +TGGACGGAGAACTGATAAGGGC ++ +BABCBCBBCACBA2 +@HWI-1668 +TGGAATGTAAAGAAGTATGGA ++ +B6-:@A36>B?A?7B?BBBBB@6>;8 +@HWI-1683 +TGGACGGAGAACTGATAAGGGC ++ +BAABABB?B?@;B>@BBAAA=9 +@HWI-1690 +TGTGATGTGACGTAGTGGAA ++ +A;;>?A0?9>9549)=1589 +@HWI-1702 +AATGGCACTGGAAGAATTCACGGGT ++ +@BA>@;@=:9@;=95;<@;<9'.3< +@HWI-1717 +AGGACGGGAAGGTGTCAACG ++ +AA?BAB?AB><50; +@HWI-1728 +TGGAATGTAAAGAAGTATGGAG ++ +@@6@BB;@BC@>>A7;BB=:?) +@HWI-1729 +CGAATAGCGTTGTGACTGA ++ +@B@CB@BB89ABBA?B8>? +@HWI-1733 +TGGAATGTAAAGAAGTATGGAG ++ +BAABBBB?BB>0=>8@B=/,;( +@HWI-1743 +TGTGATGTGACGTAGTGGAA ++ +BB>A9B+><<@>7;0=A><> +@HWI-1744 +TGGACGGAGAACTGATAAGGGC ++ +B@6?BB;>B?B@BA?B=:6>;A +@HWI-1754 +TCAGGTACCTGAAGTAGCGCGCG ++ +BBBA;;=1;;)940446-##### +@HWI-1640 +TGGACGGAGAACTGATAAGGG ++ +B99@AB;@4 +@HWI-1769 +TCAGGTACCTGAAGTAGCGCGCG ++ +BCBA<=BCBC?BB>5A9;6:%:. +@HWI-1773 +TGTGATGTGACGTAGTGGAAC ++ +BBAA>A>>;;73?:9?:2.34 +@HWI-1596 +TGGACGGAGAACTGATAAGGG ++ +BB@B@AA?A@B?B:>AB@943 +@HWI-1780 +TGGACGGAGAACTGATAAGGGCAA ++ +B;;BA@9?BAB?=-<@>@3:4>;? +@HWI-1800 +TGGAATGTAAAGAAGTATGGAG ++ +BAB?BBA@B@:)9=AAB@34?0 +@HWI-1806 +TGGAATGTAAAGAAGTATGGA ++ +BBBBCCABCCA=AA<;CB@2> +@HWI-1821 +TGGAATGTAAAGAAGTATGGAG ++ +B@8?AB;@BB>;>>;;@@:2=7 +@HWI-1766 +TGGACGGAGAACTGATAAGGGC ++ +BCACCCC?:C@7BAAACB6>2< +@HWI-1830 +TGGACGGAGAACTGATAAGGGC ++ +BA@BB>@>>@B@A5>@B@866; +@HWI-1846 +TGGACGGAGAACTGATAAGGGC ++ +BB?;5BA?@8B;>>@A9<#### +@HWI-1851 +TGGAATGTAAAGAAGTATGGA ++ +BBB@BBBABB@3??A@AB### +@HWI-1856 +TGGAATGTAAAGAAGTATGGAG ++ +B@:=@B?BBB=:>@@>BB=:>; +@HWI-1873 +TGGAATGTAAAGAAGTATGGAG ++ +BB@?BB@BBB???=>=B@2)<= +@HWI-1876 +TGGACGGAGAACTGATAAGGGC ++ +A==B@?B;?>B=72;B==/<## +@HWI-1885 +TGGAATGTAAAGAAGTATGGAG ++ +A74@BCBABAA:>1<:62;?=7928=@ +@HWI-1889 +TGGACGGAGAACTGATAAGGGC ++ +A>;@B@;@=@CAB;@BB?330: +@HWI-1894 +TAAGGAAATAGTAGCCGTGAT ++ +BAA>>######## +@HWI-1912 +TGGACGGAGAACTGATAAGGG ++ +=A4?A=)>1@?: +@HWI-1914 +TAAGGAAATAGTAGCCGTGAT ++ +BBBB7BB>9B;=AA:A>#### +@HWI-1915 +TAGGAACTTCATACCGTGCTCT ++ +BC=9@BBCCBCBCBA*?3?BAB +@HWI-1931 +TGGACGGAGAACTGATAAGGGCAA ++ +A6;@B=5@=@BBCB?B@A13)=9@ +@HWI-1940 +TGGAATGTAAAGAAGTATGGAG ++ +BC@BBBCBBAA@;?7757: +@HWI-1943 +TGTGATGTGACGTAGTGGAA ++ +BA<:7BB>>=?AA<9?>>5; +@HWI-1955 +TGGAATGTAAAGAAGTATGGAG ++ +B?;>AB;>BA???>49BA92=2 +@HWI-1884 +TGGACGGAGAACTGATAAGGGC ++ +B==A=3<=B>;8877><9?:#### +@HWI-1983 +TGGACGGAGAACTGATAAGGGC ++ +B=1@B@6:B@B@BB=BBBA@;@ +@HWI-1980 +TGGACGGAGAACTGATAAGGGC ++ +BABBB=>>6?C@BBBBA=(52@ +@HWI-1992 +TGTGATGTGACGTAGTGGAA ++ +BA@9=@7@A?<>>?:@7:9> +@HWI-1995 +CAAATTCGGTTCTAGAGAGGTT ++ +B +@HWI-2011 +ACTGAATTCTCGTGGGTCTGCAT ++ +BAB==B?B@A@8;/.(:?A>:5;A=A>?=B@@4:?@8' +@HWI-2040 +TGGACGGAGAACTGATAAGGGC ++ +BBBBB=BBB9BBB@?BB:88>= +@HWI-2049 +TGGAATGTAAAGAAGTATGGAG ++ +BA>=ABAABB>5><0;@?.-94 +@HWI-2053 +TATCACAGCCATTTTGACGAGTT ++ +BCBBCBC>A@ABCBC@@=;BCBCBAA;A?A?BAACA=805; +@HWI-2106 +TGGACGGAGAACTGATAAGGGCT ++ +A:5;BA8;A=A@?4;BA93-0:= +@HWI-2100 +TCAGGTACCTGAAGTAGCGCGCGTTAT ++ +BB@579A@B<3;=07615830,6>=## +@HWI-2072 +GGACGGAGAACTGATAAGGGCTCGG ++ +ABCBBA.3@@:A=@B=@@0<;@;.< +@HWI-2124 +TGGACGGAGAACTGATAAGGGC ++ +?00@B@1>?AB@>9;B?>503? +@HWI-2136 +TGGACGGAGAACTGATAAGGGC ++ +BBBBBBCABB@>BB?CBB?>>: +@HWI-2137 +TGTGATGTGACGTAGTGGAA ++ +B>=<:<-149;8>8.93379 +@HWI-2141 +TGTGATGTGACGTAGTGGAAA ++ +BB@AAB<@8>=+=@1=/5=B? +@HWI-2117 +CGGCACATGTTGAAGTACACTCA ++ +B>ABBABA>@?)=?0>CABABBB +@HWI-2146 +TGGACGGAGAACTGATAAGGGC ++ +B;;BB4=ABBCBB8@CC;/5;B +@HWI-2148 +TGGACGGAGAACTGATAAGGGC ++ +BBABA??@@AA:B@@AB@6=8= +@HWI-2151 +TGGACGGAGAACTGATAAGGGC ++ +BCA@>BCB?3ABBA?B?A7@6B +@HWI-2145 +TGTGATGTGACGTAGTGGAA ++ +B>?BBB>A>A;9<@1@559< +@HWI-2158 +AATGGCACTGGAAGAATTCACGGG ++ +BCBABBBB?50=?8;A=>9>;### +@HWI-2142 +TGGACGGAGAACTGATAAGGGC ++ +A4;@<@?=<@>@BB>BB8<0:7 +@HWI-2140 +TCAGGTACCTGAAGTAGCGCGCG ++ +BBABB?B=>=0<@>7<394:4:4 +@HWI-2191 +AATGGCACTGGAAGAATTCACGTG ++ +ABB?A@ABA;5;>8@>:;>=A>>BA:3107 +@HWI-2203 +TGGAATGTAAAGGAGTATGGAG ++ +BBABBC@?@A83?A>BBB>7?: +@HWI-2200 +TCTTTGGTATTCTAGCTGTAGA ++ +BBCBCC@BACC?CB:AB5@### +@HWI-2209 +TGGACGGAGAACTGATAAGGGC ++ +BAAA?A>>=;@;B;>@@>5/94 +@HWI-2218 +TGGAATGTAAAGAAGTATGGAG ++ +BC>BBCBBBC?A?@<>BB2);7 +@HWI-2222 +TGGACGGAGAACTGATAAGGGCA ++ +BCABBB?@BBCBBAABB@94;@@ +@HWI-2227 +TGGACGGAGAACTGATAAGGGC ++ +BBBBBBB?B@BAA;>BBA:0;> +@HWI-2225 +AATGGCACTGGAAGAATTCACGG ++ +ABBA@?B@:6>;?@9@@B=?::? +@HWI-2230 +TGGACGGAGAACTGATAAGGGC ++ +BBBB@CB@?>B?A;>=@>2/7; diff -r 000000000000 -r a8aacccd79a3 test-data/input.sorted.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.sorted.fa Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,500 @@ +>1 +>10 +>100 +>101 +>102 +>103 +>104 +>105 +>106 +>107 +>108 +>109 +>11 +>110 +>111 +>112 +>113 +>114 +>115 +>116 +>117 +>118 +>119 +>12 +>120 +>121 +>122 +>123 +>124 +>125 +>126 +>127 +>128 +>129 +>13 +>130 +>131 +>132 +>133 +>134 +>135 +>136 +>137 +>138 +>139 +>14 +>140 +>141 +>142 +>143 +>144 +>145 +>146 +>147 +>148 +>149 +>15 +>150 +>151 +>152 +>153 +>154 +>155 +>156 +>157 +>158 +>159 +>16 +>160 +>161 +>162 +>163 +>164 +>165 +>166 +>167 +>168 +>169 +>17 +>170 +>171 +>172 +>173 +>174 +>175 +>176 +>177 +>178 +>179 +>18 +>180 +>181 +>182 +>183 +>184 +>185 +>186 +>187 +>188 +>189 +>19 +>190 +>191 +>192 +>193 +>194 +>195 +>196 +>197 +>198 +>199 +>2 +>20 +>200 +>201 +>202 +>203 +>204 +>205 +>206 +>207 +>208 +>209 +>21 +>210 +>211 +>212 +>213 +>214 +>215 +>216 +>217 +>218 +>219 +>22 +>220 +>221 +>222 +>223 +>224 +>225 +>226 +>227 +>228 +>229 +>23 +>230 +>231 +>232 +>233 +>234 +>235 +>236 +>237 +>238 +>239 +>24 +>240 +>241 +>242 +>243 +>244 +>245 +>246 +>247 +>248 +>249 +>25 +>250 +>26 +>27 +>28 +>29 +>3 +>30 +>31 +>32 +>33 +>34 +>35 +>36 +>37 +>38 +>39 +>4 +>40 +>41 +>42 +>43 +>44 +>45 +>46 +>47 +>48 +>49 +>5 +>50 +>51 +>52 +>53 +>54 +>55 +>56 +>57 +>58 +>59 +>6 +>60 +>61 +>62 +>63 +>64 +>65 +>66 +>67 +>68 +>69 +>7 +>70 +>71 +>72 +>73 +>74 +>75 +>76 +>77 +>78 +>79 +>8 +>80 +>81 +>82 +>83 +>84 +>85 +>86 +>87 +>88 +>89 +>9 +>90 +>91 +>92 +>93 +>94 +>95 +>96 +>97 +>98 +>99 +AAAAAATATCTTTTTTAACTCGTGGCC +AAAAACACACAGACACAAGCAGCAAT +AAAACCCGGACAAACCATCGGAGGA +AAAAGGAAAAACAGAAAAATTGGG +AAAATCGACTGCCGAAAACATTTTAA +AAAGACAAAAGAAATACAGGCACT +AAATTGCAAAGATGGAAAATAAAACT +AACAACTGCAAACATCTACCACA +AACAGGAAAAACAGAAGGATTTCTA +AACAGGGAGATCAACAGCGTTGACA +AACATTTTATCAATTATACATTA +AACGGGGAATAAGGGTTCG +AACTCTCTAATTTAACTTTGTGC +AACTTTAAATTTTTAATAACCTT +AAGACAACAATGACATATAAGACG +AAGATGGAGTAGTTTTTT +AAGCACGCCTTACCACAATTTATAA +AAGGAATTAAAGCAATAATTCTAA +AAGTGAAGAAGTAGTTTTT +AATATAAAAATACAATCAACCATTGCA +AATCTACAATTTCCATTACGACTCC +AATGACACACTCTTCATCAAC +AATGTCACTTGAAGAATTCACGT +AATTGCAACAGAGACTGGAA +AATTTATTTAATTTATATTCTAACTAA +ACAAATCATAAATTTTTTTTTACT +ACAACCTCAACTCATATTT +ACAGCAGGACGGTGATCA +ACAGCAGGACGGTGATCA +ACCAGCACCTTCCGACTCAACGTCAAA +ACTAAACTTTTCTTACCATATTTCTA +AGAACAATTAAATAAAATAGCATA +AGATGTTGATCTAAACTCTCCCA +AGGAATATGATGAAATAAAAAAAT +AGGATTTTTAAGCCCATATGTTTCC +ATAAAGCTAGATTACCAAAGCAT +ATGAAATTCGAACAATACGTC +ATGTTATTTACTTTTTCCCCTTATA +ATTATTAATAAATTATTATAA +ATTTTACTTCATCATTTTC +CAAACGGAACAAGACATCACCATC +CAAATAACAAACTGAATAAACGAAA +CAACACATGACGCGACAATTCTTG +CAAGAATACAAAAAATACTAATTA +CAAGATATGAACAAAGCAAAGACAC +CAAGGCTCAGAAGAACATCACCAAGACC +CAATTTTTAATTCCTTTTTTCTTCTT +CACAGACTGAGGCAGAAAAAACAA +CACCGAACCGGGAAGGCGAACAAC +CACGACTTTATTCTTTTTATCTCA +CAGATTCACTGATTTTCTTACGCC +CAGTTTCACAAAAGATCTTTTAA +CATAATATAAACTTATCTT +CATACCTACAAAAAAGCTTCTCTTAC +CATACCTTAAATTATCTCTTTCTT +CATTAATTCATCCATTTAAACTAA +CCAGAAAACAATACAACATCCTCA +CCATATATGACTGACTCATTTCAC +CCCGAAAAGCCGAGGACGACTTA +CCGACCGAGCAAATAAACACAGGAACG +CCGCGATCTGCTTATTTATAATCTT +CCTTACTCAACATACTTAATCATACTTA +CGATATTTTCTCCTCGTACC +CGATATTTTCTCCTCTGACC +CGATATTTTCTCCTCTTACC +CGATATTTTCTCCTCTTACCT +CGCAACCAGCAGCAACTCCTAGCAT +CGCCGCAAGATGAATACTCTAATGA +CGGAAAAGAATGTAGACCATTTAA +CGGACGGTATATTTTTTAATATAA +CGGCACATGTTGAATTACACTCA +CTAGACAAGATGCTATAAATTTTAAA +CTATATTTTCTCTCTTACC +CTGGAAACTATTGATCAAATT +CTTCTTTTACTACATATTTTTTATTTTTTTA +GAAACAAACAACACATACCCTCTGGC +GAAACCATTATCTTATCTTTATACA +GAAAGGAAGGGAAGAAAGCGAAAGGA +GAACAATTTTTCAATTTTTTACATTA +GAAGAGGAGGAGGAGTTTGTAAG +GAATGATCGCACCACCACCTCAACGTT +GACGAAACGCAACAACAAAATGGACG +GAGAACTTTTAATCATTTTAC +GAGAATAAATATTTCAATGGTCTATTG +GAGGATTAAAAGAACGGTTTATAA +GCAGATAGAAATCAATACAAAAATC +GCCAACGACCATACCACGA +GCCGGGGCGTGAGATGTCTGCATTA +GGACGGAGAACTGATAAGGGCA +GGACGGAGAACTGATAATGGC +GGAGATTGTAGAACGAAAGGAAAAT +GGTATCTTTATATTTTAATTTTCTT +NATTCTTACTCCATTTCAATTTACT +TAAAAATAATTGTCTTTAATTTCA +TAAAATAAACCAAAACCCAAAAAT +TAAAATAAATAAGTCCGACGACAA +TAAAATTGTAATATTTAAATAATAT +TAAAGAAGAAGAATTGATTTTAAT +TAACAAATAGAACGTTCTAATTTAAA +TAACATAAATTTTAATCATAAATTG +TAACCTTGCAGAACTATACGATTCAAA +TAACGGAGCACGAGAACGAAGTGG +TAACTACTTTTACATTAATACTAA +TAACTTTAACTTTTTTACT +TAAGAAACTGAGCTAACGCAATGTACC +TAAGTTTTTAATCATTTTTTTT +TAATAAATTATTAAATAAAAAAAAAA +TAATAAGAAACTGTTCAAACAATCCAC +TAATAATTTAAATAAATATAAATTT +TAATCAAAAAACTCTTCATTTTTA +TAATTACCATTGCTAACTATCCA +TACAAAAAATGCGAAAATTGACCCT +TACAAAACAAACAAATTACAATCTAAA +TACAAACGGAACTTTCGTCATAA +TACAAACGGAACTTTCTTCATAACTTC +TACAAACGTAATTTTCGCATAACATC +TACAAATTTTTTTTTCTTTCTTAT +TACAACTAACATCCTTTCTTCTTCC +TACAAGACTAAAACAAACGTGAAGT +TACACACTCATCAACCAAAGGACG +TACACACTCATCAACCAAAGTACGTA +TACACAGACTTACAAAACACATCCTTC +TACACCTCTTTTTACTTTTTTATT +TACAGACAACACATACGGACTTAA +TACAGACAACACATACGGACTTAAGT +TACAGAGAAATATACAACACTCACC +TACCTCTTTATTAACCTCCACCTCTA +TACTATATACTTCTTCAAATCACA +TACTATTTTATTATACATACATACATTA +TACTCAATAGAACTCTACTCACTCATA +TACTGAAAACGGGCGCATATCAGTGG +TACTGAAACAAGGAAACACAAGC +TACTTTTTTCTTAATTTTTTATTAAAC +TAGAACTCGAACCAGAGCTCC +TAGACTTTCTACTCATTATTAC +TAGCCTTTACTAGGCTTTTTCTAA +TAGCGAGATGGACCAACGTGCTGT +TAGGAACTTCATACCGGTCTC +TAGGTACTTACCTTTTTTTTACACAA +TAGTCATACATACCTAATTATACATA +TAGTGGACTTTAAAAAAAAAAAAAAAAAA +TAGTTACCTTCATATCTCTCTTTA +TAGTTTCACTACTTTATTCTTTTTA +TAGTTTTAAATATTTCTTTTTTTC +TATAATTTTATTTTATATTTTCTCT +TATATAAATCTTCAACATCAA +TATATTGCCTCCCCATAATCCTT +TATCTATCTTTGATCTTCTTTTCA +TATCTGATCAACAATCTTTTCCCAT +TATGGAGAAACAGCGATATAAGTCA +TATGGATTATTTCAAAATTTTTTTTT +TATTATACATAGAATAACAAATCTTT +TATTATCATCTCGTTCTTCCTTCTC +TATTATTTTTTTATTCCATTCATAT +TATTCAATCACTCCATTATATATAACA +TATTCCGACAATACCTTCTTTAC +TATTTATAAATTTTTTCTTGAGAC +TATTTCACTTTATACTTCCTTAA +TATTTCAGCAACAGACTAAGACTAA +TATTTCCAACCTTCAACCTCAAATAA +TATTTCTTTTTTAACTTCTTTTC +TCAAAAAGTAATAGGGATCGTTA +TCAAAGAACAATGTAAAGCCGCGAC +TCAAATGCAAATTGGATTTATGA +TCAACTGGCAAGAATTTTTGAAAATT +TCAAGCCTTTTGAAGAACTGACCTAAA +TCAGAAGAACAGAGAATTGATTTT +TCAGAGTTCTACAGGTCCTACGATT +TCAGTCTTTTTTTCTCTCCTA +TCATTACACTTCTTACAAAAC +TCCACAACAACTCTATCTAAAGCATT +TCCACCTATTTATCTTTTCTT +TCCCAACCCTCGAGCATCATTTTC +TCCGAAAAATCGTAGGACCCGGGCA +TCCGAAAACAAGGCCCGTCGCT +TCCTGAGGACGAGGGGCGTTTAGC +TCGCCGTAAAGCCAGTCGTTCTCC +TCTAAACACCCACGAAAATCTCTTAC +TCTAGTCTGAGCGTAGTACCAGATTG +TCTATATTATTTTTATCAATTTTCACC +TCTATTCATACAAAACACTAATACCC +TCTATTTCTTTATTTTTTTTATTAT +TCTCATCTTACAATTTTTTAAAACTT +TCTCTTTTATTTTTATCTTTCCTT +TCTGTATTTGACTTATTACTTTCTCC +TCTTATTATCATTTTTTTATCCCTT +TCTTATTTTAATCTTCCAATTTC +TCTTCTATATAATCCTTTATTATAA +TCTTTTTTTTAATACTTATTTTCATT +TGAAACTGAAACTAACATACAAAATATT +TGAAAGGAAAAACAGGACACGGGA +TGACCAAAGACAAACAAACAATAAATA +TGAGAATGACTTCTTCACGATCTCTT +TGAGCGGAGAACCAGAGTTGATGAGC +TGAGCTAGAACTGCACCCACTCCA +TGAGGAAAACAGAAAAATGAGAGACA +TGATGACGGGCAGCAGGGATTTTC +TGATTTACTTACATTCTTTTTTT +TGCTTGGACTACATATGGTTGAGGG +TGCTTGGACTACATATGGTTGAGTG +TGCTTGGACTACATATGGTTGAGTG +TGCTTTTACTACATATTTTTTATTTTTTTA +TGGAATGTAAAGAAGTATGGAG +TGGAATGTAAAGAAGTATGGAG +TGGACAAGAACCACGCGACGGGTGT +TGGACGGAGAACTGATAAGGGC +TGGAGTAGCACAGTCGTCTGAAATC +TGTGTAATCTTTCTACTTCTTCTAC +TTAAACAATTTGGAATTAATT +TTAAAGACGCAACAACTAACATT +TTAAGTTTTAGACATAATCTATTACAA +TTAATATGTAATTTCATACCTCAC +TTAATGACACACGGGAAAAACACCG +TTACAATCTACTATTCTTTTATTA +TTACTAGATCCACCCTCATTA +TTAGTATTAATCTTCACTTAA +TTATAATCACGGCACCCTATACA +TTATTACATCGTCCACATATAACAAAA +TTATTATCTATTTTAATTTTTCTTAA +TTCAAGTAGATTGCATTTTTTAATA +TTCATTTTATTTTTAAATATCTTTTTT +TTCTAGCACAACACGCACACATATA +TTCTTCCTTTTATCCTCTCTTAA +TTCTTGATAACGCATCTTCTACAT +TTCTTGGACTACACATTTTTTATTGTTTTA +TTCTTTGACTACATATTTTTTATT +TTCTTTTACTACATATTGTTTATTTTTTTA +TTCTTTTACTACATATTTTTTATTTTT +TTCTTTTACTACATATTTTTTATTTTTTTA +TTGATTCTTCTTTTTCACAAAA +TTGTAAAACATTCTTTCTCCTGAC +TTTAAACACTTCCTACATCAAATTTC +TTTAAGACTTATGAGCTTG +TTTACCAGAGGAGTCGAGTTTTT +TTTATTACAACCCTATCTTACCTCAA +TTTATTACTTAGTCATAATTCCAA +TTTCCGACAAATACACCATCTTC +TTTGATACCTTTATACCATACCTATT +TTTGTTTTTCATTTTTTTATCTTT +TTTGTTTTTTACTATATTT +TTTTATATTTCCTTATATCTTTACTA +TTTTCATTTCTTCTTCAAATCCTTT +TTTTCTTTACCCATCTTTACTTTCCC +TTTTCTTTTTTTCATTTTCTCTTTTA +TTTTTAACTCATTTTACAATTAAAC +TTTTTAACTCCCATCATTTTTCCTC +TTTTTATCAATTTTCACCATTC +TTTTTTATCAATTTTCACCATTCAT +TTTTTTATCATTTTTCACCTAAAAAA +TTTTTTTTTTGTTTTTATTTTTATCAT diff -r 000000000000 -r a8aacccd79a3 test-data/output.faw --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.faw Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,494 @@ +>1_2 +ACAGCAGGACGGTGATCA +>2_2 +TGCTTGGACTACATATGGTTGAGTG +>3_2 +TGGAATGTAAAGAAGTATGGAG +>4_1 +AATTTATTTAATTTATATTCTAACTAA +>5_1 +TTATAATCACGGCACCCTATACA +>6_1 +TCCGAAAAATCGTAGGACCCGGGCA +>7_1 +TCCCAACCCTCGAGCATCATTTTC +>8_1 +TTGATTCTTCTTTTTCACAAAA +>9_1 +AGAACAATTAAATAAAATAGCATA +>10_1 +CCAGAAAACAATACAACATCCTCA +>11_1 +TCTAGTCTGAGCGTAGTACCAGATTG +>12_1 +TTTTTAACTCCCATCATTTTTCCTC +>13_1 +GCAGATAGAAATCAATACAAAAATC +>14_1 +AAGTGAAGAAGTAGTTTTT +>15_1 +AATGTCACTTGAAGAATTCACGT +>16_1 +TTTACCAGAGGAGTCGAGTTTTT +>17_1 +GGTATCTTTATATTTTAATTTTCTT +>18_1 +TTACTAGATCCACCCTCATTA +>19_1 +TATATAAATCTTCAACATCAA +>20_1 +GAAACCATTATCTTATCTTTATACA +>21_1 +GGAGATTGTAGAACGAAAGGAAAAT +>22_1 +ACTAAACTTTTCTTACCATATTTCTA +>23_1 +TCAAGCCTTTTGAAGAACTGACCTAAA +>24_1 +TAACATAAATTTTAATCATAAATTG +>25_1 +TAGCGAGATGGACCAACGTGCTGT +>26_1 +TACAAACGTAATTTTCGCATAACATC +>27_1 +CGGAAAAGAATGTAGACCATTTAA +>28_1 +GAAAGGAAGGGAAGAAAGCGAAAGGA +>29_1 +CAAGAATACAAAAAATACTAATTA +>30_1 +CATACCTACAAAAAAGCTTCTCTTAC +>31_1 +TAAAATAAATAAGTCCGACGACAA +>32_1 +AGGAATATGATGAAATAAAAAAAT +>33_1 +TTTTTTTTTTGTTTTTATTTTTATCAT +>34_1 +TTTTTAACTCATTTTACAATTAAAC +>35_1 +CCGCGATCTGCTTATTTATAATCTT +>36_1 +TAGGTACTTACCTTTTTTTTACACAA +>37_1 +TCATTACACTTCTTACAAAAC +>38_1 +TATTATACATAGAATAACAAATCTTT +>39_1 +ATGTTATTTACTTTTTCCCCTTATA +>40_1 +TCCGAAAACAAGGCCCGTCGCT +>41_1 +TGCTTTTACTACATATTTTTTATTTTTTTA +>42_1 +GAACAATTTTTCAATTTTTTACATTA +>43_1 +AACATTTTATCAATTATACATTA +>44_1 +TATCTGATCAACAATCTTTTCCCAT +>45_1 +AATTGCAACAGAGACTGGAA +>46_1 +TATTCAATCACTCCATTATATATAACA +>47_1 +CAAACGGAACAAGACATCACCATC +>48_1 +TCTTCTATATAATCCTTTATTATAA +>49_1 +TGATGACGGGCAGCAGGGATTTTC +>50_1 +TTTATTACAACCCTATCTTACCTCAA +>51_1 +AACAGGAAAAACAGAAGGATTTCTA +>52_1 +CGATATTTTCTCCTCGTACC +>53_1 +AAATTGCAAAGATGGAAAATAAAACT +>54_1 +TACACACTCATCAACCAAAGGACG +>55_1 +TATATTGCCTCCCCATAATCCTT +>56_1 +TACAAACGGAACTTTCTTCATAACTTC +>57_1 +GGACGGAGAACTGATAATGGC +>58_1 +TTCTTTGACTACATATTTTTTATT +>59_1 +TCTTTTTTTTAATACTTATTTTCATT +>60_1 +TACTTTTTTCTTAATTTTTTATTAAAC +>61_1 +AATATAAAAATACAATCAACCATTGCA +>62_1 +CAGTTTCACAAAAGATCTTTTAA +>63_1 +GAAACAAACAACACATACCCTCTGGC +>64_1 +AATGACACACTCTTCATCAAC +>65_1 +AAGATGGAGTAGTTTTTT +>66_1 +ACAACCTCAACTCATATTT +>67_1 +TTAAACAATTTGGAATTAATT +>68_1 +TACAAAAAATGCGAAAATTGACCCT +>69_1 +GGACGGAGAACTGATAAGGGCA +>70_1 +TTAAGTTTTAGACATAATCTATTACAA +>71_1 +TTATTATCTATTTTAATTTTTCTTAA +>72_1 +CACCGAACCGGGAAGGCGAACAAC +>73_1 +TCCACCTATTTATCTTTTCTT +>74_1 +TCGCCGTAAAGCCAGTCGTTCTCC +>75_1 +TCAAAGAACAATGTAAAGCCGCGAC +>76_1 +TGGACAAGAACCACGCGACGGGTGT +>77_1 +CCCGAAAAGCCGAGGACGACTTA +>78_1 +TTTTTTATCATTTTTCACCTAAAAAA +>79_1 +TAGAACTCGAACCAGAGCTCC +>80_1 +TCTATATTATTTTTATCAATTTTCACC +>81_1 +TCTATTTCTTTATTTTTTTTATTAT +>82_1 +TTTGATACCTTTATACCATACCTATT +>83_1 +ATAAAGCTAGATTACCAAAGCAT +>84_1 +GCCAACGACCATACCACGA +>85_1 +CGGCACATGTTGAATTACACTCA +>86_1 +TACTATTTTATTATACATACATACATTA +>87_1 +TTAATGACACACGGGAAAAACACCG +>88_1 +TAGTTTCACTACTTTATTCTTTTTA +>89_1 +AACAGGGAGATCAACAGCGTTGACA +>90_1 +CGATATTTTCTCCTCTGACC +>91_1 +ACCAGCACCTTCCGACTCAACGTCAAA +>92_1 +AAGGAATTAAAGCAATAATTCTAA +>93_1 +TACAAAACAAACAAATTACAATCTAAA +>94_1 +CAATTTTTAATTCCTTTTTTCTTCTT +>95_1 +TACAGACAACACATACGGACTTAA +>96_1 +TCTGTATTTGACTTATTACTTTCTCC +>97_1 +TGAGCTAGAACTGCACCCACTCCA +>98_1 +CGCCGCAAGATGAATACTCTAATGA +>99_1 +TATTTCTTTTTTAACTTCTTTTC +>100_1 +TTACAATCTACTATTCTTTTATTA +>101_1 +TTTAAACACTTCCTACATCAAATTTC +>102_1 +TGTGTAATCTTTCTACTTCTTCTAC +>103_1 +TCTATTCATACAAAACACTAATACCC +>104_1 +TGGAGTAGCACAGTCGTCTGAAATC +>105_1 +AAGCACGCCTTACCACAATTTATAA +>106_1 +CTGGAAACTATTGATCAAATT +>107_1 +TACACAGACTTACAAAACACATCCTTC +>108_1 +TTCAAGTAGATTGCATTTTTTAATA +>109_1 +TTATTACATCGTCCACATATAACAAAA +>110_1 +CAAGGCTCAGAAGAACATCACCAAGACC +>111_1 +TGAGGAAAACAGAAAAATGAGAGACA +>112_1 +TCAAAAAGTAATAGGGATCGTTA +>113_1 +TAACTTTAACTTTTTTACT +>114_1 +TATTCCGACAATACCTTCTTTAC +>115_1 +TTTGTTTTTTACTATATTT +>116_1 +TTCATTTTATTTTTAAATATCTTTTTT +>117_1 +TACTCAATAGAACTCTACTCACTCATA +>118_1 +TGAAAGGAAAAACAGGACACGGGA +>119_1 +AAAATCGACTGCCGAAAACATTTTAA +>120_1 +TACAGAGAAATATACAACACTCACC +>121_1 +TCAACTGGCAAGAATTTTTGAAAATT +>122_1 +GAGAACTTTTAATCATTTTAC +>123_1 +TATTATCATCTCGTTCTTCCTTCTC +>124_1 +TTTTCATTTCTTCTTCAAATCCTTT +>125_1 +TAGTCATACATACCTAATTATACATA +>126_1 +ATTTTACTTCATCATTTTC +>127_1 +TCTCTTTTATTTTTATCTTTCCTT +>128_1 +GCCGGGGCGTGAGATGTCTGCATTA +>129_1 +AGGATTTTTAAGCCCATATGTTTCC +>130_1 +CAAGATATGAACAAAGCAAAGACAC +>131_1 +CAACACATGACGCGACAATTCTTG +>132_1 +CAAATAACAAACTGAATAAACGAAA +>133_1 +TGAGAATGACTTCTTCACGATCTCTT +>134_1 +TCTTATTATCATTTTTTTATCCCTT +>135_1 +TCAAATGCAAATTGGATTTATGA +>136_1 +CCTTACTCAACATACTTAATCATACTTA +>137_1 +TAGACTTTCTACTCATTATTAC +>138_1 +TGAAACTGAAACTAACATACAAAATATT +>139_1 +AAAACCCGGACAAACCATCGGAGGA +>140_1 +TACAGACAACACATACGGACTTAAGT +>141_1 +TATTTCAGCAACAGACTAAGACTAA +>142_1 +AACTTTAAATTTTTAATAACCTT +>143_1 +TATTTATAAATTTTTTCTTGAGAC +>144_1 +TTAATATGTAATTTCATACCTCAC +>145_1 +CACAGACTGAGGCAGAAAAAACAA +>146_1 +TAAAGAAGAAGAATTGATTTTAAT +>147_1 +TACTGAAAACGGGCGCATATCAGTGG +>148_1 +TCAGTCTTTTTTTCTCTCCTA +>149_1 +TATAATTTTATTTTATATTTTCTCT +>150_1 +NATTCTTACTCCATTTCAATTTACT +>151_1 +TTGTAAAACATTCTTTCTCCTGAC +>152_1 +TAATTACCATTGCTAACTATCCA +>153_1 +TTCTTCCTTTTATCCTCTCTTAA +>154_1 +TCTAAACACCCACGAAAATCTCTTAC +>155_1 +AAAAACACACAGACACAAGCAGCAAT +>156_1 +CGGACGGTATATTTTTTAATATAA +>157_1 +TATGGAGAAACAGCGATATAAGTCA +>158_1 +TACAACTAACATCCTTTCTTCTTCC +>159_1 +AACTCTCTAATTTAACTTTGTGC +>160_1 +TCCTGAGGACGAGGGGCGTTTAGC +>161_1 +TATTTCCAACCTTCAACCTCAAATAA +>162_1 +TGGACGGAGAACTGATAAGGGC +>163_1 +TTTAAGACTTATGAGCTTG +>164_1 +TTAAAGACGCAACAACTAACATT +>165_1 +TAGGAACTTCATACCGGTCTC +>166_1 +CGATATTTTCTCCTCTTACC +>167_1 +GAGGATTAAAAGAACGGTTTATAA +>168_1 +GAATGATCGCACCACCACCTCAACGTT +>169_1 +TTTTCTTTACCCATCTTTACTTTCCC +>170_1 +AAGACAACAATGACATATAAGACG +>171_1 +TAATAATTTAAATAAATATAAATTT +>172_1 +TACTGAAACAAGGAAACACAAGC +>173_1 +TCAGAAGAACAGAGAATTGATTTT +>174_1 +CATACCTTAAATTATCTCTTTCTT +>175_1 +TTCTTTTACTACATATTTTTTATTTTT +>176_1 +AAAAAATATCTTTTTTAACTCGTGGCC +>177_1 +TAACAAATAGAACGTTCTAATTTAAA +>178_1 +TAGTTACCTTCATATCTCTCTTTA +>179_1 +TAAAATTGTAATATTTAAATAATAT +>180_1 +AAAAGGAAAAACAGAAAAATTGGG +>181_1 +AGATGTTGATCTAAACTCTCCCA +>182_1 +TACCTCTTTATTAACCTCCACCTCTA +>183_1 +TTTCCGACAAATACACCATCTTC +>184_1 +ACAAATCATAAATTTTTTTTTACT +>185_1 +GACGAAACGCAACAACAAAATGGACG +>186_1 +TACAAATTTTTTTTTCTTTCTTAT +>187_1 +TACACCTCTTTTTACTTTTTTATT +>188_1 +TATGGATTATTTCAAAATTTTTTTTT +>189_1 +TTCTAGCACAACACGCACACATATA +>190_1 +TAACTACTTTTACATTAATACTAA +>191_1 +TCTCATCTTACAATTTTTTAAAACTT +>192_1 +TTCTTGGACTACACATTTTTTATTGTTTTA +>193_1 +TACACACTCATCAACCAAAGTACGTA +>194_1 +TACTATATACTTCTTCAAATCACA +>195_1 +TCAGAGTTCTACAGGTCCTACGATT +>196_1 +TGATTTACTTACATTCTTTTTTT +>197_1 +CCATATATGACTGACTCATTTCAC +>198_1 +GAAGAGGAGGAGGAGTTTGTAAG +>199_1 +AAAGACAAAAGAAATACAGGCACT +>200_1 +TACAAGACTAAAACAAACGTGAAGT +>201_1 +TAACGGAGCACGAGAACGAAGTGG +>202_1 +CTTCTTTTACTACATATTTTTTATTTTTTTA +>203_1 +TAATAAGAAACTGTTCAAACAATCCAC +>204_1 +TGAGCGGAGAACCAGAGTTGATGAGC +>205_1 +TATTATTTTTTTATTCCATTCATAT +>206_1 +TTTATTACTTAGTCATAATTCCAA +>207_1 +TTTTATATTTCCTTATATCTTTACTA +>208_1 +AACGGGGAATAAGGGTTCG +>209_1 +AATCTACAATTTCCATTACGACTCC +>210_1 +CCGACCGAGCAAATAAACACAGGAACG +>211_1 +TCCACAACAACTCTATCTAAAGCATT +>212_1 +TTCTTGATAACGCATCTTCTACAT +>213_1 +TGCTTGGACTACATATGGTTGAGGG +>214_1 +CAGATTCACTGATTTTCTTACGCC +>215_1 +TTTGTTTTTCATTTTTTTATCTTT +>216_1 +CTATATTTTCTCTCTTACC +>217_1 +TAACCTTGCAGAACTATACGATTCAAA +>218_1 +TAAGAAACTGAGCTAACGCAATGTACC +>219_1 +TTCTTTTACTACATATTTTTTATTTTTTTA +>220_1 +TATCTATCTTTGATCTTCTTTTCA +>221_1 +TAATAAATTATTAAATAAAAAAAAAA +>222_1 +TTTTTTATCAATTTTCACCATTCAT +>223_1 +TATTTCACTTTATACTTCCTTAA +>224_1 +TAGTTTTAAATATTTCTTTTTTTC +>225_1 +TTCTTTTACTACATATTGTTTATTTTTTTA +>226_1 +GAGAATAAATATTTCAATGGTCTATTG +>227_1 +CGATATTTTCTCCTCTTACCT +>228_1 +CACGACTTTATTCTTTTTATCTCA +>229_1 +TAGTGGACTTTAAAAAAAAAAAAAAAAAA +>230_1 +CATAATATAAACTTATCTT +>231_1 +ATGAAATTCGAACAATACGTC +>232_1 +AACAACTGCAAACATCTACCACA +>233_1 +TAAAAATAATTGTCTTTAATTTCA +>234_1 +CGCAACCAGCAGCAACTCCTAGCAT +>235_1 +ATTATTAATAAATTATTATAA +>236_1 +CATTAATTCATCCATTTAAACTAA +>237_1 +TCTTATTTTAATCTTCCAATTTC +>238_1 +CTAGACAAGATGCTATAAATTTTAAA +>239_1 +TGACCAAAGACAAACAAACAATAAATA +>240_1 +TTTTTATCAATTTTCACCATTC +>241_1 +TAAGTTTTTAATCATTTTTTTT +>242_1 +TAATCAAAAAACTCTTCATTTTTA +>243_1 +TACAAACGGAACTTTCGTCATAA +>244_1 +TTTTCTTTTTTTCATTTTCTCTTTTA +>245_1 +TAGCCTTTACTAGGCTTTTTCTAA +>246_1 +TTAGTATTAATCTTCACTTAA +>247_1 +TAAAATAAACCAAAACCCAAAAAT diff -r 000000000000 -r a8aacccd79a3 test-data/output.sorted.faw --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.sorted.faw Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,494 @@ +>100_1 +>101_1 +>102_1 +>103_1 +>104_1 +>105_1 +>106_1 +>107_1 +>108_1 +>109_1 +>10_1 +>110_1 +>111_1 +>112_1 +>113_1 +>114_1 +>115_1 +>116_1 +>117_1 +>118_1 +>119_1 +>11_1 +>120_1 +>121_1 +>122_1 +>123_1 +>124_1 +>125_1 +>126_1 +>127_1 +>128_1 +>129_1 +>12_1 +>130_1 +>131_1 +>132_1 +>133_1 +>134_1 +>135_1 +>136_1 +>137_1 +>138_1 +>139_1 +>13_1 +>140_1 +>141_1 +>142_1 +>143_1 +>144_1 +>145_1 +>146_1 +>147_1 +>148_1 +>149_1 +>14_1 +>150_1 +>151_1 +>152_1 +>153_1 +>154_1 +>155_1 +>156_1 +>157_1 +>158_1 +>159_1 +>15_1 +>160_1 +>161_1 +>162_1 +>163_1 +>164_1 +>165_1 +>166_1 +>167_1 +>168_1 +>169_1 +>16_1 +>170_1 +>171_1 +>172_1 +>173_1 +>174_1 +>175_1 +>176_1 +>177_1 +>178_1 +>179_1 +>17_1 +>180_1 +>181_1 +>182_1 +>183_1 +>184_1 +>185_1 +>186_1 +>187_1 +>188_1 +>189_1 +>18_1 +>190_1 +>191_1 +>192_1 +>193_1 +>194_1 +>195_1 +>196_1 +>197_1 +>198_1 +>199_1 +>19_1 +>1_2 +>200_1 +>201_1 +>202_1 +>203_1 +>204_1 +>205_1 +>206_1 +>207_1 +>208_1 +>209_1 +>20_1 +>210_1 +>211_1 +>212_1 +>213_1 +>214_1 +>215_1 +>216_1 +>217_1 +>218_1 +>219_1 +>21_1 +>220_1 +>221_1 +>222_1 +>223_1 +>224_1 +>225_1 +>226_1 +>227_1 +>228_1 +>229_1 +>22_1 +>230_1 +>231_1 +>232_1 +>233_1 +>234_1 +>235_1 +>236_1 +>237_1 +>238_1 +>239_1 +>23_1 +>240_1 +>241_1 +>242_1 +>243_1 +>244_1 +>245_1 +>246_1 +>247_1 +>24_1 +>25_1 +>26_1 +>27_1 +>28_1 +>29_1 +>2_2 +>30_1 +>31_1 +>32_1 +>33_1 +>34_1 +>35_1 +>36_1 +>37_1 +>38_1 +>39_1 +>3_2 +>40_1 +>41_1 +>42_1 +>43_1 +>44_1 +>45_1 +>46_1 +>47_1 +>48_1 +>49_1 +>4_1 +>50_1 +>51_1 +>52_1 +>53_1 +>54_1 +>55_1 +>56_1 +>57_1 +>58_1 +>59_1 +>5_1 +>60_1 +>61_1 +>62_1 +>63_1 +>64_1 +>65_1 +>66_1 +>67_1 +>68_1 +>69_1 +>6_1 +>70_1 +>71_1 +>72_1 +>73_1 +>74_1 +>75_1 +>76_1 +>77_1 +>78_1 +>79_1 +>7_1 +>80_1 +>81_1 +>82_1 +>83_1 +>84_1 +>85_1 +>86_1 +>87_1 +>88_1 +>89_1 +>8_1 +>90_1 +>91_1 +>92_1 +>93_1 +>94_1 +>95_1 +>96_1 +>97_1 +>98_1 +>99_1 +>9_1 +AAAAAATATCTTTTTTAACTCGTGGCC +AAAAACACACAGACACAAGCAGCAAT +AAAACCCGGACAAACCATCGGAGGA +AAAAGGAAAAACAGAAAAATTGGG +AAAATCGACTGCCGAAAACATTTTAA +AAAGACAAAAGAAATACAGGCACT +AAATTGCAAAGATGGAAAATAAAACT +AACAACTGCAAACATCTACCACA +AACAGGAAAAACAGAAGGATTTCTA +AACAGGGAGATCAACAGCGTTGACA +AACATTTTATCAATTATACATTA +AACGGGGAATAAGGGTTCG +AACTCTCTAATTTAACTTTGTGC +AACTTTAAATTTTTAATAACCTT +AAGACAACAATGACATATAAGACG +AAGATGGAGTAGTTTTTT +AAGCACGCCTTACCACAATTTATAA +AAGGAATTAAAGCAATAATTCTAA +AAGTGAAGAAGTAGTTTTT +AATATAAAAATACAATCAACCATTGCA +AATCTACAATTTCCATTACGACTCC +AATGACACACTCTTCATCAAC +AATGTCACTTGAAGAATTCACGT +AATTGCAACAGAGACTGGAA +AATTTATTTAATTTATATTCTAACTAA +ACAAATCATAAATTTTTTTTTACT +ACAACCTCAACTCATATTT +ACAGCAGGACGGTGATCA +ACCAGCACCTTCCGACTCAACGTCAAA +ACTAAACTTTTCTTACCATATTTCTA +AGAACAATTAAATAAAATAGCATA +AGATGTTGATCTAAACTCTCCCA +AGGAATATGATGAAATAAAAAAAT +AGGATTTTTAAGCCCATATGTTTCC +ATAAAGCTAGATTACCAAAGCAT +ATGAAATTCGAACAATACGTC +ATGTTATTTACTTTTTCCCCTTATA +ATTATTAATAAATTATTATAA +ATTTTACTTCATCATTTTC +CAAACGGAACAAGACATCACCATC +CAAATAACAAACTGAATAAACGAAA +CAACACATGACGCGACAATTCTTG +CAAGAATACAAAAAATACTAATTA +CAAGATATGAACAAAGCAAAGACAC +CAAGGCTCAGAAGAACATCACCAAGACC +CAATTTTTAATTCCTTTTTTCTTCTT +CACAGACTGAGGCAGAAAAAACAA +CACCGAACCGGGAAGGCGAACAAC +CACGACTTTATTCTTTTTATCTCA +CAGATTCACTGATTTTCTTACGCC +CAGTTTCACAAAAGATCTTTTAA +CATAATATAAACTTATCTT +CATACCTACAAAAAAGCTTCTCTTAC +CATACCTTAAATTATCTCTTTCTT +CATTAATTCATCCATTTAAACTAA +CCAGAAAACAATACAACATCCTCA +CCATATATGACTGACTCATTTCAC +CCCGAAAAGCCGAGGACGACTTA +CCGACCGAGCAAATAAACACAGGAACG +CCGCGATCTGCTTATTTATAATCTT +CCTTACTCAACATACTTAATCATACTTA +CGATATTTTCTCCTCGTACC +CGATATTTTCTCCTCTGACC +CGATATTTTCTCCTCTTACC +CGATATTTTCTCCTCTTACCT +CGCAACCAGCAGCAACTCCTAGCAT +CGCCGCAAGATGAATACTCTAATGA +CGGAAAAGAATGTAGACCATTTAA +CGGACGGTATATTTTTTAATATAA +CGGCACATGTTGAATTACACTCA +CTAGACAAGATGCTATAAATTTTAAA +CTATATTTTCTCTCTTACC +CTGGAAACTATTGATCAAATT +CTTCTTTTACTACATATTTTTTATTTTTTTA +GAAACAAACAACACATACCCTCTGGC +GAAACCATTATCTTATCTTTATACA +GAAAGGAAGGGAAGAAAGCGAAAGGA +GAACAATTTTTCAATTTTTTACATTA +GAAGAGGAGGAGGAGTTTGTAAG +GAATGATCGCACCACCACCTCAACGTT +GACGAAACGCAACAACAAAATGGACG +GAGAACTTTTAATCATTTTAC +GAGAATAAATATTTCAATGGTCTATTG +GAGGATTAAAAGAACGGTTTATAA +GCAGATAGAAATCAATACAAAAATC +GCCAACGACCATACCACGA +GCCGGGGCGTGAGATGTCTGCATTA +GGACGGAGAACTGATAAGGGCA +GGACGGAGAACTGATAATGGC +GGAGATTGTAGAACGAAAGGAAAAT +GGTATCTTTATATTTTAATTTTCTT +NATTCTTACTCCATTTCAATTTACT +TAAAAATAATTGTCTTTAATTTCA +TAAAATAAACCAAAACCCAAAAAT +TAAAATAAATAAGTCCGACGACAA +TAAAATTGTAATATTTAAATAATAT +TAAAGAAGAAGAATTGATTTTAAT +TAACAAATAGAACGTTCTAATTTAAA +TAACATAAATTTTAATCATAAATTG +TAACCTTGCAGAACTATACGATTCAAA +TAACGGAGCACGAGAACGAAGTGG +TAACTACTTTTACATTAATACTAA +TAACTTTAACTTTTTTACT +TAAGAAACTGAGCTAACGCAATGTACC +TAAGTTTTTAATCATTTTTTTT +TAATAAATTATTAAATAAAAAAAAAA +TAATAAGAAACTGTTCAAACAATCCAC +TAATAATTTAAATAAATATAAATTT +TAATCAAAAAACTCTTCATTTTTA +TAATTACCATTGCTAACTATCCA +TACAAAAAATGCGAAAATTGACCCT +TACAAAACAAACAAATTACAATCTAAA +TACAAACGGAACTTTCGTCATAA +TACAAACGGAACTTTCTTCATAACTTC +TACAAACGTAATTTTCGCATAACATC +TACAAATTTTTTTTTCTTTCTTAT +TACAACTAACATCCTTTCTTCTTCC +TACAAGACTAAAACAAACGTGAAGT +TACACACTCATCAACCAAAGGACG +TACACACTCATCAACCAAAGTACGTA +TACACAGACTTACAAAACACATCCTTC +TACACCTCTTTTTACTTTTTTATT +TACAGACAACACATACGGACTTAA +TACAGACAACACATACGGACTTAAGT +TACAGAGAAATATACAACACTCACC +TACCTCTTTATTAACCTCCACCTCTA +TACTATATACTTCTTCAAATCACA +TACTATTTTATTATACATACATACATTA +TACTCAATAGAACTCTACTCACTCATA +TACTGAAAACGGGCGCATATCAGTGG +TACTGAAACAAGGAAACACAAGC +TACTTTTTTCTTAATTTTTTATTAAAC +TAGAACTCGAACCAGAGCTCC +TAGACTTTCTACTCATTATTAC +TAGCCTTTACTAGGCTTTTTCTAA +TAGCGAGATGGACCAACGTGCTGT +TAGGAACTTCATACCGGTCTC +TAGGTACTTACCTTTTTTTTACACAA +TAGTCATACATACCTAATTATACATA +TAGTGGACTTTAAAAAAAAAAAAAAAAAA +TAGTTACCTTCATATCTCTCTTTA +TAGTTTCACTACTTTATTCTTTTTA +TAGTTTTAAATATTTCTTTTTTTC +TATAATTTTATTTTATATTTTCTCT +TATATAAATCTTCAACATCAA +TATATTGCCTCCCCATAATCCTT +TATCTATCTTTGATCTTCTTTTCA +TATCTGATCAACAATCTTTTCCCAT +TATGGAGAAACAGCGATATAAGTCA +TATGGATTATTTCAAAATTTTTTTTT +TATTATACATAGAATAACAAATCTTT +TATTATCATCTCGTTCTTCCTTCTC +TATTATTTTTTTATTCCATTCATAT +TATTCAATCACTCCATTATATATAACA +TATTCCGACAATACCTTCTTTAC +TATTTATAAATTTTTTCTTGAGAC +TATTTCACTTTATACTTCCTTAA +TATTTCAGCAACAGACTAAGACTAA +TATTTCCAACCTTCAACCTCAAATAA +TATTTCTTTTTTAACTTCTTTTC +TCAAAAAGTAATAGGGATCGTTA +TCAAAGAACAATGTAAAGCCGCGAC +TCAAATGCAAATTGGATTTATGA +TCAACTGGCAAGAATTTTTGAAAATT +TCAAGCCTTTTGAAGAACTGACCTAAA +TCAGAAGAACAGAGAATTGATTTT +TCAGAGTTCTACAGGTCCTACGATT +TCAGTCTTTTTTTCTCTCCTA +TCATTACACTTCTTACAAAAC +TCCACAACAACTCTATCTAAAGCATT +TCCACCTATTTATCTTTTCTT +TCCCAACCCTCGAGCATCATTTTC +TCCGAAAAATCGTAGGACCCGGGCA +TCCGAAAACAAGGCCCGTCGCT +TCCTGAGGACGAGGGGCGTTTAGC +TCGCCGTAAAGCCAGTCGTTCTCC +TCTAAACACCCACGAAAATCTCTTAC +TCTAGTCTGAGCGTAGTACCAGATTG +TCTATATTATTTTTATCAATTTTCACC +TCTATTCATACAAAACACTAATACCC +TCTATTTCTTTATTTTTTTTATTAT +TCTCATCTTACAATTTTTTAAAACTT +TCTCTTTTATTTTTATCTTTCCTT +TCTGTATTTGACTTATTACTTTCTCC +TCTTATTATCATTTTTTTATCCCTT +TCTTATTTTAATCTTCCAATTTC +TCTTCTATATAATCCTTTATTATAA +TCTTTTTTTTAATACTTATTTTCATT +TGAAACTGAAACTAACATACAAAATATT +TGAAAGGAAAAACAGGACACGGGA +TGACCAAAGACAAACAAACAATAAATA +TGAGAATGACTTCTTCACGATCTCTT +TGAGCGGAGAACCAGAGTTGATGAGC +TGAGCTAGAACTGCACCCACTCCA +TGAGGAAAACAGAAAAATGAGAGACA +TGATGACGGGCAGCAGGGATTTTC +TGATTTACTTACATTCTTTTTTT +TGCTTGGACTACATATGGTTGAGGG +TGCTTGGACTACATATGGTTGAGTG +TGCTTTTACTACATATTTTTTATTTTTTTA +TGGAATGTAAAGAAGTATGGAG +TGGACAAGAACCACGCGACGGGTGT +TGGACGGAGAACTGATAAGGGC +TGGAGTAGCACAGTCGTCTGAAATC +TGTGTAATCTTTCTACTTCTTCTAC +TTAAACAATTTGGAATTAATT +TTAAAGACGCAACAACTAACATT +TTAAGTTTTAGACATAATCTATTACAA +TTAATATGTAATTTCATACCTCAC +TTAATGACACACGGGAAAAACACCG +TTACAATCTACTATTCTTTTATTA +TTACTAGATCCACCCTCATTA +TTAGTATTAATCTTCACTTAA +TTATAATCACGGCACCCTATACA +TTATTACATCGTCCACATATAACAAAA +TTATTATCTATTTTAATTTTTCTTAA +TTCAAGTAGATTGCATTTTTTAATA +TTCATTTTATTTTTAAATATCTTTTTT +TTCTAGCACAACACGCACACATATA +TTCTTCCTTTTATCCTCTCTTAA +TTCTTGATAACGCATCTTCTACAT +TTCTTGGACTACACATTTTTTATTGTTTTA +TTCTTTGACTACATATTTTTTATT +TTCTTTTACTACATATTGTTTATTTTTTTA +TTCTTTTACTACATATTTTTTATTTTT +TTCTTTTACTACATATTTTTTATTTTTTTA +TTGATTCTTCTTTTTCACAAAA +TTGTAAAACATTCTTTCTCCTGAC +TTTAAACACTTCCTACATCAAATTTC +TTTAAGACTTATGAGCTTG +TTTACCAGAGGAGTCGAGTTTTT +TTTATTACAACCCTATCTTACCTCAA +TTTATTACTTAGTCATAATTCCAA +TTTCCGACAAATACACCATCTTC +TTTGATACCTTTATACCATACCTATT +TTTGTTTTTCATTTTTTTATCTTT +TTTGTTTTTTACTATATTT +TTTTATATTTCCTTATATCTTTACTA +TTTTCATTTCTTCTTCAAATCCTTT +TTTTCTTTACCCATCTTTACTTTCCC +TTTTCTTTTTTTCATTTTCTCTTTTA +TTTTTAACTCATTTTACAATTAAAC +TTTTTAACTCCCATCATTTTTCCTC +TTTTTATCAATTTTCACCATTC +TTTTTTATCAATTTTCACCATTCAT +TTTTTTATCATTTTTCACCTAAAAAA +TTTTTTTTTTGTTTTTATTTTTATCAT diff -r 000000000000 -r a8aacccd79a3 test-data/output.sorted.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.sorted.tab Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,247 @@ +AAAAAATATCTTTTTTAACTCGTGGCC 1 +AAAAACACACAGACACAAGCAGCAAT 1 +AAAACCCGGACAAACCATCGGAGGA 1 +AAAAGGAAAAACAGAAAAATTGGG 1 +AAAATCGACTGCCGAAAACATTTTAA 1 +AAAGACAAAAGAAATACAGGCACT 1 +AAATTGCAAAGATGGAAAATAAAACT 1 +AACAACTGCAAACATCTACCACA 1 +AACAGGAAAAACAGAAGGATTTCTA 1 +AACAGGGAGATCAACAGCGTTGACA 1 +AACATTTTATCAATTATACATTA 1 +AACGGGGAATAAGGGTTCG 1 +AACTCTCTAATTTAACTTTGTGC 1 +AACTTTAAATTTTTAATAACCTT 1 +AAGACAACAATGACATATAAGACG 1 +AAGATGGAGTAGTTTTTT 1 +AAGCACGCCTTACCACAATTTATAA 1 +AAGGAATTAAAGCAATAATTCTAA 1 +AAGTGAAGAAGTAGTTTTT 1 +AATATAAAAATACAATCAACCATTGCA 1 +AATCTACAATTTCCATTACGACTCC 1 +AATGACACACTCTTCATCAAC 1 +AATGTCACTTGAAGAATTCACGT 1 +AATTGCAACAGAGACTGGAA 1 +AATTTATTTAATTTATATTCTAACTAA 1 +ACAAATCATAAATTTTTTTTTACT 1 +ACAACCTCAACTCATATTT 1 +ACAGCAGGACGGTGATCA 2 +ACCAGCACCTTCCGACTCAACGTCAAA 1 +ACTAAACTTTTCTTACCATATTTCTA 1 +AGAACAATTAAATAAAATAGCATA 1 +AGATGTTGATCTAAACTCTCCCA 1 +AGGAATATGATGAAATAAAAAAAT 1 +AGGATTTTTAAGCCCATATGTTTCC 1 +ATAAAGCTAGATTACCAAAGCAT 1 +ATGAAATTCGAACAATACGTC 1 +ATGTTATTTACTTTTTCCCCTTATA 1 +ATTATTAATAAATTATTATAA 1 +ATTTTACTTCATCATTTTC 1 +CAAACGGAACAAGACATCACCATC 1 +CAAATAACAAACTGAATAAACGAAA 1 +CAACACATGACGCGACAATTCTTG 1 +CAAGAATACAAAAAATACTAATTA 1 +CAAGATATGAACAAAGCAAAGACAC 1 +CAAGGCTCAGAAGAACATCACCAAGACC 1 +CAATTTTTAATTCCTTTTTTCTTCTT 1 +CACAGACTGAGGCAGAAAAAACAA 1 +CACCGAACCGGGAAGGCGAACAAC 1 +CACGACTTTATTCTTTTTATCTCA 1 +CAGATTCACTGATTTTCTTACGCC 1 +CAGTTTCACAAAAGATCTTTTAA 1 +CATAATATAAACTTATCTT 1 +CATACCTACAAAAAAGCTTCTCTTAC 1 +CATACCTTAAATTATCTCTTTCTT 1 +CATTAATTCATCCATTTAAACTAA 1 +CCAGAAAACAATACAACATCCTCA 1 +CCATATATGACTGACTCATTTCAC 1 +CCCGAAAAGCCGAGGACGACTTA 1 +CCGACCGAGCAAATAAACACAGGAACG 1 +CCGCGATCTGCTTATTTATAATCTT 1 +CCTTACTCAACATACTTAATCATACTTA 1 +CGATATTTTCTCCTCGTACC 1 +CGATATTTTCTCCTCTGACC 1 +CGATATTTTCTCCTCTTACC 1 +CGATATTTTCTCCTCTTACCT 1 +CGCAACCAGCAGCAACTCCTAGCAT 1 +CGCCGCAAGATGAATACTCTAATGA 1 +CGGAAAAGAATGTAGACCATTTAA 1 +CGGACGGTATATTTTTTAATATAA 1 +CGGCACATGTTGAATTACACTCA 1 +CTAGACAAGATGCTATAAATTTTAAA 1 +CTATATTTTCTCTCTTACC 1 +CTGGAAACTATTGATCAAATT 1 +CTTCTTTTACTACATATTTTTTATTTTTTTA 1 +GAAACAAACAACACATACCCTCTGGC 1 +GAAACCATTATCTTATCTTTATACA 1 +GAAAGGAAGGGAAGAAAGCGAAAGGA 1 +GAACAATTTTTCAATTTTTTACATTA 1 +GAAGAGGAGGAGGAGTTTGTAAG 1 +GAATGATCGCACCACCACCTCAACGTT 1 +GACGAAACGCAACAACAAAATGGACG 1 +GAGAACTTTTAATCATTTTAC 1 +GAGAATAAATATTTCAATGGTCTATTG 1 +GAGGATTAAAAGAACGGTTTATAA 1 +GCAGATAGAAATCAATACAAAAATC 1 +GCCAACGACCATACCACGA 1 +GCCGGGGCGTGAGATGTCTGCATTA 1 +GGACGGAGAACTGATAAGGGCA 1 +GGACGGAGAACTGATAATGGC 1 +GGAGATTGTAGAACGAAAGGAAAAT 1 +GGTATCTTTATATTTTAATTTTCTT 1 +NATTCTTACTCCATTTCAATTTACT 1 +TAAAAATAATTGTCTTTAATTTCA 1 +TAAAATAAACCAAAACCCAAAAAT 1 +TAAAATAAATAAGTCCGACGACAA 1 +TAAAATTGTAATATTTAAATAATAT 1 +TAAAGAAGAAGAATTGATTTTAAT 1 +TAACAAATAGAACGTTCTAATTTAAA 1 +TAACATAAATTTTAATCATAAATTG 1 +TAACCTTGCAGAACTATACGATTCAAA 1 +TAACGGAGCACGAGAACGAAGTGG 1 +TAACTACTTTTACATTAATACTAA 1 +TAACTTTAACTTTTTTACT 1 +TAAGAAACTGAGCTAACGCAATGTACC 1 +TAAGTTTTTAATCATTTTTTTT 1 +TAATAAATTATTAAATAAAAAAAAAA 1 +TAATAAGAAACTGTTCAAACAATCCAC 1 +TAATAATTTAAATAAATATAAATTT 1 +TAATCAAAAAACTCTTCATTTTTA 1 +TAATTACCATTGCTAACTATCCA 1 +TACAAAAAATGCGAAAATTGACCCT 1 +TACAAAACAAACAAATTACAATCTAAA 1 +TACAAACGGAACTTTCGTCATAA 1 +TACAAACGGAACTTTCTTCATAACTTC 1 +TACAAACGTAATTTTCGCATAACATC 1 +TACAAATTTTTTTTTCTTTCTTAT 1 +TACAACTAACATCCTTTCTTCTTCC 1 +TACAAGACTAAAACAAACGTGAAGT 1 +TACACACTCATCAACCAAAGGACG 1 +TACACACTCATCAACCAAAGTACGTA 1 +TACACAGACTTACAAAACACATCCTTC 1 +TACACCTCTTTTTACTTTTTTATT 1 +TACAGACAACACATACGGACTTAA 1 +TACAGACAACACATACGGACTTAAGT 1 +TACAGAGAAATATACAACACTCACC 1 +TACCTCTTTATTAACCTCCACCTCTA 1 +TACTATATACTTCTTCAAATCACA 1 +TACTATTTTATTATACATACATACATTA 1 +TACTCAATAGAACTCTACTCACTCATA 1 +TACTGAAAACGGGCGCATATCAGTGG 1 +TACTGAAACAAGGAAACACAAGC 1 +TACTTTTTTCTTAATTTTTTATTAAAC 1 +TAGAACTCGAACCAGAGCTCC 1 +TAGACTTTCTACTCATTATTAC 1 +TAGCCTTTACTAGGCTTTTTCTAA 1 +TAGCGAGATGGACCAACGTGCTGT 1 +TAGGAACTTCATACCGGTCTC 1 +TAGGTACTTACCTTTTTTTTACACAA 1 +TAGTCATACATACCTAATTATACATA 1 +TAGTGGACTTTAAAAAAAAAAAAAAAAAA 1 +TAGTTACCTTCATATCTCTCTTTA 1 +TAGTTTCACTACTTTATTCTTTTTA 1 +TAGTTTTAAATATTTCTTTTTTTC 1 +TATAATTTTATTTTATATTTTCTCT 1 +TATATAAATCTTCAACATCAA 1 +TATATTGCCTCCCCATAATCCTT 1 +TATCTATCTTTGATCTTCTTTTCA 1 +TATCTGATCAACAATCTTTTCCCAT 1 +TATGGAGAAACAGCGATATAAGTCA 1 +TATGGATTATTTCAAAATTTTTTTTT 1 +TATTATACATAGAATAACAAATCTTT 1 +TATTATCATCTCGTTCTTCCTTCTC 1 +TATTATTTTTTTATTCCATTCATAT 1 +TATTCAATCACTCCATTATATATAACA 1 +TATTCCGACAATACCTTCTTTAC 1 +TATTTATAAATTTTTTCTTGAGAC 1 +TATTTCACTTTATACTTCCTTAA 1 +TATTTCAGCAACAGACTAAGACTAA 1 +TATTTCCAACCTTCAACCTCAAATAA 1 +TATTTCTTTTTTAACTTCTTTTC 1 +TCAAAAAGTAATAGGGATCGTTA 1 +TCAAAGAACAATGTAAAGCCGCGAC 1 +TCAAATGCAAATTGGATTTATGA 1 +TCAACTGGCAAGAATTTTTGAAAATT 1 +TCAAGCCTTTTGAAGAACTGACCTAAA 1 +TCAGAAGAACAGAGAATTGATTTT 1 +TCAGAGTTCTACAGGTCCTACGATT 1 +TCAGTCTTTTTTTCTCTCCTA 1 +TCATTACACTTCTTACAAAAC 1 +TCCACAACAACTCTATCTAAAGCATT 1 +TCCACCTATTTATCTTTTCTT 1 +TCCCAACCCTCGAGCATCATTTTC 1 +TCCGAAAAATCGTAGGACCCGGGCA 1 +TCCGAAAACAAGGCCCGTCGCT 1 +TCCTGAGGACGAGGGGCGTTTAGC 1 +TCGCCGTAAAGCCAGTCGTTCTCC 1 +TCTAAACACCCACGAAAATCTCTTAC 1 +TCTAGTCTGAGCGTAGTACCAGATTG 1 +TCTATATTATTTTTATCAATTTTCACC 1 +TCTATTCATACAAAACACTAATACCC 1 +TCTATTTCTTTATTTTTTTTATTAT 1 +TCTCATCTTACAATTTTTTAAAACTT 1 +TCTCTTTTATTTTTATCTTTCCTT 1 +TCTGTATTTGACTTATTACTTTCTCC 1 +TCTTATTATCATTTTTTTATCCCTT 1 +TCTTATTTTAATCTTCCAATTTC 1 +TCTTCTATATAATCCTTTATTATAA 1 +TCTTTTTTTTAATACTTATTTTCATT 1 +TGAAACTGAAACTAACATACAAAATATT 1 +TGAAAGGAAAAACAGGACACGGGA 1 +TGACCAAAGACAAACAAACAATAAATA 1 +TGAGAATGACTTCTTCACGATCTCTT 1 +TGAGCGGAGAACCAGAGTTGATGAGC 1 +TGAGCTAGAACTGCACCCACTCCA 1 +TGAGGAAAACAGAAAAATGAGAGACA 1 +TGATGACGGGCAGCAGGGATTTTC 1 +TGATTTACTTACATTCTTTTTTT 1 +TGCTTGGACTACATATGGTTGAGGG 1 +TGCTTGGACTACATATGGTTGAGTG 2 +TGCTTTTACTACATATTTTTTATTTTTTTA 1 +TGGAATGTAAAGAAGTATGGAG 2 +TGGACAAGAACCACGCGACGGGTGT 1 +TGGACGGAGAACTGATAAGGGC 1 +TGGAGTAGCACAGTCGTCTGAAATC 1 +TGTGTAATCTTTCTACTTCTTCTAC 1 +TTAAACAATTTGGAATTAATT 1 +TTAAAGACGCAACAACTAACATT 1 +TTAAGTTTTAGACATAATCTATTACAA 1 +TTAATATGTAATTTCATACCTCAC 1 +TTAATGACACACGGGAAAAACACCG 1 +TTACAATCTACTATTCTTTTATTA 1 +TTACTAGATCCACCCTCATTA 1 +TTAGTATTAATCTTCACTTAA 1 +TTATAATCACGGCACCCTATACA 1 +TTATTACATCGTCCACATATAACAAAA 1 +TTATTATCTATTTTAATTTTTCTTAA 1 +TTCAAGTAGATTGCATTTTTTAATA 1 +TTCATTTTATTTTTAAATATCTTTTTT 1 +TTCTAGCACAACACGCACACATATA 1 +TTCTTCCTTTTATCCTCTCTTAA 1 +TTCTTGATAACGCATCTTCTACAT 1 +TTCTTGGACTACACATTTTTTATTGTTTTA 1 +TTCTTTGACTACATATTTTTTATT 1 +TTCTTTTACTACATATTGTTTATTTTTTTA 1 +TTCTTTTACTACATATTTTTTATTTTT 1 +TTCTTTTACTACATATTTTTTATTTTTTTA 1 +TTGATTCTTCTTTTTCACAAAA 1 +TTGTAAAACATTCTTTCTCCTGAC 1 +TTTAAACACTTCCTACATCAAATTTC 1 +TTTAAGACTTATGAGCTTG 1 +TTTACCAGAGGAGTCGAGTTTTT 1 +TTTATTACAACCCTATCTTACCTCAA 1 +TTTATTACTTAGTCATAATTCCAA 1 +TTTCCGACAAATACACCATCTTC 1 +TTTGATACCTTTATACCATACCTATT 1 +TTTGTTTTTCATTTTTTTATCTTT 1 +TTTGTTTTTTACTATATTT 1 +TTTTATATTTCCTTATATCTTTACTA 1 +TTTTCATTTCTTCTTCAAATCCTTT 1 +TTTTCTTTACCCATCTTTACTTTCCC 1 +TTTTCTTTTTTTCATTTTCTCTTTTA 1 +TTTTTAACTCATTTTACAATTAAAC 1 +TTTTTAACTCCCATCATTTTTCCTC 1 +TTTTTATCAATTTTCACCATTC 1 +TTTTTTATCAATTTTCACCATTCAT 1 +TTTTTTATCATTTTTCACCTAAAAAA 1 +TTTTTTTTTTGTTTTTATTTTTATCAT 1 diff -r 000000000000 -r a8aacccd79a3 test-data/output.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.tab Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,247 @@ +ACAGCAGGACGGTGATCA 2 +TGCTTGGACTACATATGGTTGAGTG 2 +TGGAATGTAAAGAAGTATGGAG 2 +AATTTATTTAATTTATATTCTAACTAA 1 +TTATAATCACGGCACCCTATACA 1 +TCCGAAAAATCGTAGGACCCGGGCA 1 +TCCCAACCCTCGAGCATCATTTTC 1 +TTGATTCTTCTTTTTCACAAAA 1 +AGAACAATTAAATAAAATAGCATA 1 +CCAGAAAACAATACAACATCCTCA 1 +TCTAGTCTGAGCGTAGTACCAGATTG 1 +TTTTTAACTCCCATCATTTTTCCTC 1 +GCAGATAGAAATCAATACAAAAATC 1 +AAGTGAAGAAGTAGTTTTT 1 +AATGTCACTTGAAGAATTCACGT 1 +TTTACCAGAGGAGTCGAGTTTTT 1 +GGTATCTTTATATTTTAATTTTCTT 1 +TTACTAGATCCACCCTCATTA 1 +TATATAAATCTTCAACATCAA 1 +GAAACCATTATCTTATCTTTATACA 1 +GGAGATTGTAGAACGAAAGGAAAAT 1 +ACTAAACTTTTCTTACCATATTTCTA 1 +TCAAGCCTTTTGAAGAACTGACCTAAA 1 +TAACATAAATTTTAATCATAAATTG 1 +TAGCGAGATGGACCAACGTGCTGT 1 +TACAAACGTAATTTTCGCATAACATC 1 +CGGAAAAGAATGTAGACCATTTAA 1 +GAAAGGAAGGGAAGAAAGCGAAAGGA 1 +CAAGAATACAAAAAATACTAATTA 1 +CATACCTACAAAAAAGCTTCTCTTAC 1 +TAAAATAAATAAGTCCGACGACAA 1 +AGGAATATGATGAAATAAAAAAAT 1 +TTTTTTTTTTGTTTTTATTTTTATCAT 1 +TTTTTAACTCATTTTACAATTAAAC 1 +CCGCGATCTGCTTATTTATAATCTT 1 +TAGGTACTTACCTTTTTTTTACACAA 1 +TCATTACACTTCTTACAAAAC 1 +TATTATACATAGAATAACAAATCTTT 1 +ATGTTATTTACTTTTTCCCCTTATA 1 +TCCGAAAACAAGGCCCGTCGCT 1 +TGCTTTTACTACATATTTTTTATTTTTTTA 1 +GAACAATTTTTCAATTTTTTACATTA 1 +AACATTTTATCAATTATACATTA 1 +TATCTGATCAACAATCTTTTCCCAT 1 +AATTGCAACAGAGACTGGAA 1 +TATTCAATCACTCCATTATATATAACA 1 +CAAACGGAACAAGACATCACCATC 1 +TCTTCTATATAATCCTTTATTATAA 1 +TGATGACGGGCAGCAGGGATTTTC 1 +TTTATTACAACCCTATCTTACCTCAA 1 +AACAGGAAAAACAGAAGGATTTCTA 1 +CGATATTTTCTCCTCGTACC 1 +AAATTGCAAAGATGGAAAATAAAACT 1 +TACACACTCATCAACCAAAGGACG 1 +TATATTGCCTCCCCATAATCCTT 1 +TACAAACGGAACTTTCTTCATAACTTC 1 +GGACGGAGAACTGATAATGGC 1 +TTCTTTGACTACATATTTTTTATT 1 +TCTTTTTTTTAATACTTATTTTCATT 1 +TACTTTTTTCTTAATTTTTTATTAAAC 1 +AATATAAAAATACAATCAACCATTGCA 1 +CAGTTTCACAAAAGATCTTTTAA 1 +GAAACAAACAACACATACCCTCTGGC 1 +AATGACACACTCTTCATCAAC 1 +AAGATGGAGTAGTTTTTT 1 +ACAACCTCAACTCATATTT 1 +TTAAACAATTTGGAATTAATT 1 +TACAAAAAATGCGAAAATTGACCCT 1 +GGACGGAGAACTGATAAGGGCA 1 +TTAAGTTTTAGACATAATCTATTACAA 1 +TTATTATCTATTTTAATTTTTCTTAA 1 +CACCGAACCGGGAAGGCGAACAAC 1 +TCCACCTATTTATCTTTTCTT 1 +TCGCCGTAAAGCCAGTCGTTCTCC 1 +TCAAAGAACAATGTAAAGCCGCGAC 1 +TGGACAAGAACCACGCGACGGGTGT 1 +CCCGAAAAGCCGAGGACGACTTA 1 +TTTTTTATCATTTTTCACCTAAAAAA 1 +TAGAACTCGAACCAGAGCTCC 1 +TCTATATTATTTTTATCAATTTTCACC 1 +TCTATTTCTTTATTTTTTTTATTAT 1 +TTTGATACCTTTATACCATACCTATT 1 +ATAAAGCTAGATTACCAAAGCAT 1 +GCCAACGACCATACCACGA 1 +CGGCACATGTTGAATTACACTCA 1 +TACTATTTTATTATACATACATACATTA 1 +TTAATGACACACGGGAAAAACACCG 1 +TAGTTTCACTACTTTATTCTTTTTA 1 +AACAGGGAGATCAACAGCGTTGACA 1 +CGATATTTTCTCCTCTGACC 1 +ACCAGCACCTTCCGACTCAACGTCAAA 1 +AAGGAATTAAAGCAATAATTCTAA 1 +TACAAAACAAACAAATTACAATCTAAA 1 +CAATTTTTAATTCCTTTTTTCTTCTT 1 +TACAGACAACACATACGGACTTAA 1 +TCTGTATTTGACTTATTACTTTCTCC 1 +TGAGCTAGAACTGCACCCACTCCA 1 +CGCCGCAAGATGAATACTCTAATGA 1 +TATTTCTTTTTTAACTTCTTTTC 1 +TTACAATCTACTATTCTTTTATTA 1 +TTTAAACACTTCCTACATCAAATTTC 1 +TGTGTAATCTTTCTACTTCTTCTAC 1 +TCTATTCATACAAAACACTAATACCC 1 +TGGAGTAGCACAGTCGTCTGAAATC 1 +AAGCACGCCTTACCACAATTTATAA 1 +CTGGAAACTATTGATCAAATT 1 +TACACAGACTTACAAAACACATCCTTC 1 +TTCAAGTAGATTGCATTTTTTAATA 1 +TTATTACATCGTCCACATATAACAAAA 1 +CAAGGCTCAGAAGAACATCACCAAGACC 1 +TGAGGAAAACAGAAAAATGAGAGACA 1 +TCAAAAAGTAATAGGGATCGTTA 1 +TAACTTTAACTTTTTTACT 1 +TATTCCGACAATACCTTCTTTAC 1 +TTTGTTTTTTACTATATTT 1 +TTCATTTTATTTTTAAATATCTTTTTT 1 +TACTCAATAGAACTCTACTCACTCATA 1 +TGAAAGGAAAAACAGGACACGGGA 1 +AAAATCGACTGCCGAAAACATTTTAA 1 +TACAGAGAAATATACAACACTCACC 1 +TCAACTGGCAAGAATTTTTGAAAATT 1 +GAGAACTTTTAATCATTTTAC 1 +TATTATCATCTCGTTCTTCCTTCTC 1 +TTTTCATTTCTTCTTCAAATCCTTT 1 +TAGTCATACATACCTAATTATACATA 1 +ATTTTACTTCATCATTTTC 1 +TCTCTTTTATTTTTATCTTTCCTT 1 +GCCGGGGCGTGAGATGTCTGCATTA 1 +AGGATTTTTAAGCCCATATGTTTCC 1 +CAAGATATGAACAAAGCAAAGACAC 1 +CAACACATGACGCGACAATTCTTG 1 +CAAATAACAAACTGAATAAACGAAA 1 +TGAGAATGACTTCTTCACGATCTCTT 1 +TCTTATTATCATTTTTTTATCCCTT 1 +TCAAATGCAAATTGGATTTATGA 1 +CCTTACTCAACATACTTAATCATACTTA 1 +TAGACTTTCTACTCATTATTAC 1 +TGAAACTGAAACTAACATACAAAATATT 1 +AAAACCCGGACAAACCATCGGAGGA 1 +TACAGACAACACATACGGACTTAAGT 1 +TATTTCAGCAACAGACTAAGACTAA 1 +AACTTTAAATTTTTAATAACCTT 1 +TATTTATAAATTTTTTCTTGAGAC 1 +TTAATATGTAATTTCATACCTCAC 1 +CACAGACTGAGGCAGAAAAAACAA 1 +TAAAGAAGAAGAATTGATTTTAAT 1 +TACTGAAAACGGGCGCATATCAGTGG 1 +TCAGTCTTTTTTTCTCTCCTA 1 +TATAATTTTATTTTATATTTTCTCT 1 +NATTCTTACTCCATTTCAATTTACT 1 +TTGTAAAACATTCTTTCTCCTGAC 1 +TAATTACCATTGCTAACTATCCA 1 +TTCTTCCTTTTATCCTCTCTTAA 1 +TCTAAACACCCACGAAAATCTCTTAC 1 +AAAAACACACAGACACAAGCAGCAAT 1 +CGGACGGTATATTTTTTAATATAA 1 +TATGGAGAAACAGCGATATAAGTCA 1 +TACAACTAACATCCTTTCTTCTTCC 1 +AACTCTCTAATTTAACTTTGTGC 1 +TCCTGAGGACGAGGGGCGTTTAGC 1 +TATTTCCAACCTTCAACCTCAAATAA 1 +TGGACGGAGAACTGATAAGGGC 1 +TTTAAGACTTATGAGCTTG 1 +TTAAAGACGCAACAACTAACATT 1 +TAGGAACTTCATACCGGTCTC 1 +CGATATTTTCTCCTCTTACC 1 +GAGGATTAAAAGAACGGTTTATAA 1 +GAATGATCGCACCACCACCTCAACGTT 1 +TTTTCTTTACCCATCTTTACTTTCCC 1 +AAGACAACAATGACATATAAGACG 1 +TAATAATTTAAATAAATATAAATTT 1 +TACTGAAACAAGGAAACACAAGC 1 +TCAGAAGAACAGAGAATTGATTTT 1 +CATACCTTAAATTATCTCTTTCTT 1 +TTCTTTTACTACATATTTTTTATTTTT 1 +AAAAAATATCTTTTTTAACTCGTGGCC 1 +TAACAAATAGAACGTTCTAATTTAAA 1 +TAGTTACCTTCATATCTCTCTTTA 1 +TAAAATTGTAATATTTAAATAATAT 1 +AAAAGGAAAAACAGAAAAATTGGG 1 +AGATGTTGATCTAAACTCTCCCA 1 +TACCTCTTTATTAACCTCCACCTCTA 1 +TTTCCGACAAATACACCATCTTC 1 +ACAAATCATAAATTTTTTTTTACT 1 +GACGAAACGCAACAACAAAATGGACG 1 +TACAAATTTTTTTTTCTTTCTTAT 1 +TACACCTCTTTTTACTTTTTTATT 1 +TATGGATTATTTCAAAATTTTTTTTT 1 +TTCTAGCACAACACGCACACATATA 1 +TAACTACTTTTACATTAATACTAA 1 +TCTCATCTTACAATTTTTTAAAACTT 1 +TTCTTGGACTACACATTTTTTATTGTTTTA 1 +TACACACTCATCAACCAAAGTACGTA 1 +TACTATATACTTCTTCAAATCACA 1 +TCAGAGTTCTACAGGTCCTACGATT 1 +TGATTTACTTACATTCTTTTTTT 1 +CCATATATGACTGACTCATTTCAC 1 +GAAGAGGAGGAGGAGTTTGTAAG 1 +AAAGACAAAAGAAATACAGGCACT 1 +TACAAGACTAAAACAAACGTGAAGT 1 +TAACGGAGCACGAGAACGAAGTGG 1 +CTTCTTTTACTACATATTTTTTATTTTTTTA 1 +TAATAAGAAACTGTTCAAACAATCCAC 1 +TGAGCGGAGAACCAGAGTTGATGAGC 1 +TATTATTTTTTTATTCCATTCATAT 1 +TTTATTACTTAGTCATAATTCCAA 1 +TTTTATATTTCCTTATATCTTTACTA 1 +AACGGGGAATAAGGGTTCG 1 +AATCTACAATTTCCATTACGACTCC 1 +CCGACCGAGCAAATAAACACAGGAACG 1 +TCCACAACAACTCTATCTAAAGCATT 1 +TTCTTGATAACGCATCTTCTACAT 1 +TGCTTGGACTACATATGGTTGAGGG 1 +CAGATTCACTGATTTTCTTACGCC 1 +TTTGTTTTTCATTTTTTTATCTTT 1 +CTATATTTTCTCTCTTACC 1 +TAACCTTGCAGAACTATACGATTCAAA 1 +TAAGAAACTGAGCTAACGCAATGTACC 1 +TTCTTTTACTACATATTTTTTATTTTTTTA 1 +TATCTATCTTTGATCTTCTTTTCA 1 +TAATAAATTATTAAATAAAAAAAAAA 1 +TTTTTTATCAATTTTCACCATTCAT 1 +TATTTCACTTTATACTTCCTTAA 1 +TAGTTTTAAATATTTCTTTTTTTC 1 +TTCTTTTACTACATATTGTTTATTTTTTTA 1 +GAGAATAAATATTTCAATGGTCTATTG 1 +CGATATTTTCTCCTCTTACCT 1 +CACGACTTTATTCTTTTTATCTCA 1 +TAGTGGACTTTAAAAAAAAAAAAAAAAAA 1 +CATAATATAAACTTATCTT 1 +ATGAAATTCGAACAATACGTC 1 +AACAACTGCAAACATCTACCACA 1 +TAAAAATAATTGTCTTTAATTTCA 1 +CGCAACCAGCAGCAACTCCTAGCAT 1 +ATTATTAATAAATTATTATAA 1 +CATTAATTCATCCATTTAAACTAA 1 +TCTTATTTTAATCTTCCAATTTC 1 +CTAGACAAGATGCTATAAATTTTAAA 1 +TGACCAAAGACAAACAAACAATAAATA 1 +TTTTTATCAATTTTCACCATTC 1 +TAAGTTTTTAATCATTTTTTTT 1 +TAATCAAAAAACTCTTCATTTTTA 1 +TACAAACGGAACTTTCGTCATAA 1 +TTTTCTTTTTTTCATTTTCTCTTTTA 1 +TAGCCTTTACTAGGCTTTTTCTAA 1 +TTAGTATTAATCTTCACTTAA 1 +TAAAATAAACCAAAACCCAAAAAT 1 diff -r 000000000000 -r a8aacccd79a3 test-data/sort.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort.py Mon Sep 04 07:13:28 2017 -0400 @@ -0,0 +1,8 @@ +import sys + +F = open(sys.argv[1], 'r') +lines = F.readlines() +lines = [line[:-1] for line in lines] +for line in sorted(lines): + print(line) +F.close()