changeset 9:6bf9de09aa74 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit c8f13ba73552ccf7db7c22859b7fdc6ad121cdf0"
author artbio
date Mon, 11 Apr 2022 00:27:41 +0000
parents 3519c2de7fac
children fd4a60fc3fca
files sr_bowtie_dataset_annotation.xml test-data/unmatched_2.fa test-data/unmatched_3.fa test-data/unmatched_4.fa test-data/unmatched_5.fa test-data/unmatched_fastq.fa
diffstat 6 files changed, 1300 insertions(+), 269 deletions(-) [+]
line wrap: on
line diff
--- a/sr_bowtie_dataset_annotation.xml	Sat Apr 09 22:45:21 2022 +0000
+++ b/sr_bowtie_dataset_annotation.xml	Mon Apr 11 00:27:41 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.6">
+<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.7">
   <description>by iterative alignments with sRbowtie</description>
   <requirements>
         <requirement type="package" version="1.3.1">bowtie</requirement>
@@ -24,7 +24,9 @@
         #elif $input[0].is_of_type('fastq'):
             #set format = "-q"
         #end if
-
+        
+        mkdir unmatched_dir &&
+        
         #for $file in $input:
             #set sample=$file.element_identifier
             bowtie -p \${GALAXY_SLOTS:-4}
@@ -58,12 +60,15 @@
             #end for
             remaining=\$(( \$(wc -l < class_unmatched.fa)/2)) &&
             echo -e "$sample\tNot classified\t\${remaining}\t\${genome_aligned}" >> $output &&
+            cp class_unmatched.fa unmatched_dir/${sample}_unmatched.fasta &&
+            #if $format == '-q':
+                mv unmatched_dir/${sample}_unmatched.fasta unmatched_dir/${sample}_unmatched.fastq &&
+                sed -n '1~4s/^@/>/p;2~4p' unmatched_dir/${sample}_unmatched.fastq > unmatched_dir/${sample}_unmatched.fasta &&
+                rm unmatched_dir/${sample}_unmatched.fastq &&
+            #end if
         #end for
+        ls -la unmatched_dir &&
         Rscript $__tool_directory__/barplot.r --input $output --barplot $barplot
-        #if $format == '-q':
-            && mv class_unmatched.fa class_unmatched.fastq
-            && sed -n '1~4s/^@/>/p;2~4p' class_unmatched.fastq > class_unmatched.fa
-        #end if
         ]]></command>
   <inputs>
     <param name="input" type="data" multiple="True" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/>
@@ -96,7 +101,9 @@
 <!-- End of other bowtie index selections -->
    </inputs>
    <outputs>
-       <data format="fasta" name="unmatched" label="Annotate smRNAs: Unmatched reads" from_work_dir="class_unmatched.fa" />
+       <collection name="unmatched" type="list" format="fasta" label="Annotate smRNAs: Unmatched reads">
+           <discover_datasets pattern="__name_and_ext__" directory="unmatched_dir" />
+       </collection>
        <data format="tabular" name="output" label="Cascade Annotation Analysis">
            <actions>
                <action name="column_names" type="metadata" default="Sample,Reference Index,Number of reads, Total reads" />
@@ -106,6 +113,22 @@
     </outputs>
     <tests>
         <test>
+            <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" />
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
+            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
+            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
+            <output name="output" ftype="tabular" file="multisample5_output.tab" />
+            <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" />
+            <output_collection name="unmatched" type="list" count="5">
+                <element name="sample5.fa_unmatched" file="unmatched_5.fa" ftype="fasta"/>
+                <element name="sample4.fa_unmatched" file="unmatched_4.fa" ftype="fasta"/>
+                <element name="sample3.fa_unmatched" file="unmatched_3.fa" ftype="fasta"/>
+                <element name="sample2.fa_unmatched" file="unmatched_2.fa" ftype="fasta"/>
+                <element name="sample1.fa_unmatched" file="unmatched_1.fa" ftype="fasta"/>
+            </output_collection>
+        </test>
+        <test>
             <param name="input" value ="sample1.fa" ftype="fasta" />
             <param name="genomeSource" value="history" />
             <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
@@ -113,7 +136,9 @@
             <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
             <output name="output" ftype="tabular" file="sample1_output.tab" />
             <output name="barplot" ftype="pdf" file="sample1_output.pdf" compare="sim_size" delta="500"/>
-            <output name="unmatched" ftype="fasta" file="unmatched_1.fa" />
+            <output_collection name="unmatched" type="list">
+                <element name="sample1.fa_unmatched" file="unmatched_1.fa" ftype="fasta"/>
+            </output_collection>
         </test>
         <test>
             <param name="input" value ="sample.fastq" ftype="fastq" />
@@ -123,17 +148,9 @@
             <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
             <output name="output" ftype="tabular" file="sample_output.tab" />
             <output name="barplot" ftype="pdf" file="sample_output.pdf" compare="sim_size" delta="500"/>
-            <output name="unmatched" ftype="fasta" file="unmatched_2.fa" />
-        </test>
-        <test>
-            <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" />
-            <param name="genomeSource" value="history" />
-            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
-            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
-            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
-            <output name="output" ftype="tabular" file="multisample5_output.tab" />
-            <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" />
-            <output name="unmatched" ftype="fasta" file="unmatched_3.fa" />
+            <output_collection name="unmatched" type="list">
+                <element name="sample.fastq_unmatched" file="unmatched_fastq.fa" ftype="fasta"/>
+            </output_collection>
         </test>
     </tests>
   <help>
--- a/test-data/unmatched_2.fa	Sat Apr 09 22:45:21 2022 +0000
+++ b/test-data/unmatched_2.fa	Mon Apr 11 00:27:41 2022 +0000
@@ -1,78 +1,444 @@
->HWI-176
-CGAGACTTTGAACTAAAAACAGT
->HWI-277
+>30787
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>30836
+TAAATATTTTTTTTGAAACA
+>30977
+CGACTGTAATTATTAGCACAATACT
+>30985
+GACGATATTGCTGCAATAGACCTTGA
+>31000
 TCAGATGAGAGACAAATTAGAAT
->HWI-458
-TTTCGAGGTTCCGAATTTTCTGTC
->HWI-778
-TAGGATGTGCTCTGCGGTTTCCACT
->HWI-895
-TTTAATTGGCGCAGTCGGTAGGATC
->HWI-967
-GAGAATGACGGAACTGTAATA
->HWI-974
-TTGCGAGGTCGTCCTGGGAGACCAG
->HWI-1009
-AACGTTGGAAGAAGCTCTGCAGC
->HWI-1011
-TTTAATGTTAACACGGACATTGACC
->HWI-1014
-TCTCCGACTTGCTGAGCTGTTTCCGCCG
->HWI-1106
-CATCTTGTTATTCTATTGTCTTTGGTC
->HWI-1202
-AAAGTAAATTCTGATGACTTCAAAAT
->HWI-1227
-TACCATGTAAATTCGTTTCTTCG
->HWI-1262
-TATTAGCTCAAAGAACAGCTCGT
->HWI-1297
-TTGCAGCAATATCGTCAACATCCTC
->HWI-1520
-TAATATCGAAGCCGAACTGAGAACA
->HWI-1584
-TAAAGTTATGACAAGAATTGATGTT
->HWI-1590
-TCATATGCCAATTTCGTGTTTCGATG
->HWI-1609
+>31256
+TAATTCGGAATGCCTGCTCTACT
+>31417
+TCAATGATCGCTGTGCTCAGTAGGA
+>31506
+TTTGTCTGACGTTAAAAAATATA
+>31567
+TCCACATTAGGAGGATTATTAGACAAC
+>31790
+AAACATAATAATTGATGGCGGAAGA
+>31872
+AAGGTAATCATAGAGCACCACGGTT
+>32157
+TTTCTGTGAATTCACATGCTGATGA
+>32192
+TTTCTCATGTATAAAATGCTCTGATGG
+>32223
+TATCTTGTTATTCTAGTGTCTTTGGTT
+>32338
+TGTGGGACTCGAGCCAAAATGGCAACCT
+>32497
+TGCGGTTGGACAATTTTTTTTTTATA
+>32506
+TTGTTGTTTGGAGGAAGTTCCTTT
+>32510
+TCTTCCGCCATCAATTATTATGTTTT
+>32522
+TACTTGACTTTTCTATAGAATCTGGT
+>32540
+TATCGTCAACATCCTCGAACGATCGAGA
+>32626
+TTGATCAAGGTAGGGTTGTCGC
+>32646
+TCTGTTAAACACCCCTGAATCGTGGAT
+>32657
+TTTGGACATTTTGCAGGTGATACAAT
+>32682
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>32716
+TATCTATAGTTCCGATTGGCCATCTC
+>32885
+GAAAGTGGGTATCTGTATTTTAGGC
+>32967
+TAAAGATACCATCTAACCTCCTTGGA
+>33075
+ACAGATATTAGTGTTTTTCAAGCAGC
+>33147
+ATCGCAATTATGGCATAACAGATTCGGA
+>33163
+TTCAGCGTCGCTTGATTGAATAGAT
+>33231
+TAAGAACTTCTGAGGTGAAGGGC
+>33242
+TTGGGTTTAGAAATTAAAATTAAGGC
+>33244
+TATAGAAAGTTACGAATATATTAGGA
+>33248
+TTTTTGATCAATTGGCACCGTGCGAA
+>33296
+TGCAGAGCTTCTTCCAACGTTGGCAAG
+>33318
+TAGATGTCTGCAGGAATAACGGA
+>33338
+TCGACTATTAATGGCTGTTAGAATT
+>33475
+CAAACTTATCGACCATCTCCTCAAACG
+>33601
+TTACCCTTTTTCCGGAGCGTTTGTGC
+>33611
+AATGATCGCTGTGCTCAGTATGACGG
+>33662
+TTTGATTCATTACAATTTACGCTGAA
+>33737
+CAGATATTAGTGTTTTTCAAGCAGC
+>33747
+TACATGTAAAGCAGCTGTGTGTGC
+>33815
+AGTTTTTGGAATCACTTGA
+>34032
+TGCGGACGTGTGCTCGCTGCGTGA
+>34200
+TAATATATGTATAATCTGCTTGGTG
+>34240
+TCTTTACGCCATATAAATCATTTCGA
+>34259
+ACAAATCATAAATTTGATGGGACGA
+>34268
+TCTTGTGGACTTCACTCTAG
+>34297
+TAAGTAAATAGTCCCCGCCTTATTGAGG
+>34337
+TGGATAATGAATGTTTTTGATTTGC
+>34569
+TCTAGGTTCTTCTGAAATCGTGGGA
+>34643
+TCGTGTAGACCGGATAAGATTTTTT
+>34697
+TCTAATACTGTGAAAGGGTGGGG
+>34808
+GTAGGATGTGCTCTGCGGTTTCCAC
+>34892
+TCTGTCGCAGTTGTAGCTTGCAATA
+>34896
+TGTAGTTGCCACTTATGCTGTCCA
+>35151
+TAGCAATGTCCGTCTGTCCGTATGA
+>35196
+TGTAAATGGTCAGCGAAAGCAAAGG
+>35211
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>35246
+CCTCAGAGAACGTCAGACCGCG
+>35272
+TGCAGAGCTTCTTCCAACGTTGG
+>35273
+TCTGGTAGTAAGAAAAATGTAGCTT
+>35286
+TATGTATGGATATGTATATTTATGGGT
+>35317
+TTTTGGTTTGATCGTCAGGTGGTC
+>35512
+GACGATAATAGTGAATTTTGGACA
+>35566
+TTCGAATTCGCGCCATTTCACAATC
+>35948
+TGGCCTGTATACGCTTTCTGTTG
+>36009
+TTGCAAAAGTCATATCTTGAGG
+>36059
+TAAGTTACTATGGATCCATAAGGGTA
+>36100
+TGATATGGGACTTGTAGCTTTTTTAAA
+>36121
+TGTCGTGGGCTGTGCGTTTGAGAA
+>36235
+TGCTACCTTTAGCTGCAAGATTAACT
+>36238
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>36523
+TGCGAGTCCGAGCTTTGAACGTGGG
+>36607
+TCTCAATGTAATGTCTTCTTTTTGGA
+>36624
+CGTTCTTTAAAACCACCAATGGGA
+>36776
+TACTCACGACCATTGTCATTTCTCAAG
+>36825
+CGTTCGAGGTCCACTTTCTTAGCGGA
+>36850
+TCTGATGTCGGCGAGAAAGGAGTCTCA
+>36872
+TCAAGGCCAGCACACAGTAACATGGT
+>36968
+ACATTGGAAATACCGCGGGACCGC
+>36994
+TATACGTTTTTTGACCTCTTCTCTTTGA
+>37110
+TGATAATGTAAACAAAGATAAAGGG
+>37210
+TCAGATGAAAGACAAATTAGAATTAAA
+>37224
+GACGATAATAGTGAATTTTGGACA
+>37226
+TCTGAAGTTGCCGCACTAGAGATGG
+>37339
+TATACAGTCCACTATATCGTTGTTTAA
+>37395
+TACAATTTACGCTGAATTTAAATGAA
+>37460
+CACAAAGTAACGTGCACCACCATTT
+>37766
+TCTGTGCAACTTTGTACGCGAAGTCAGC
+>37836
+TCAAGGATTAATGTAGGGGGGGGG
+>38096
+TTTGGAGGTAGAAGTCTTAGTGGCCGC
+>38100
+GACAAAATATGCCCTTCAATTTAGA
+>38113
+TATGGTCCAGAATGTAGCCTCGGC
+>38213
+TAACTGTTTTTAGTTCAAAGTCTCGAA
+>38224
+TTTCAGTTTTTATTGTTAGTCACAGG
+>38263
+TAACTGCGAAGTCGATCAGGTCCGA
+>38266
+ATCCGGACGATTGACGAGGAGCCCATT
+>38271
+TTATGTCAGTGTCGAAGGCGATCGAA
+>38373
+TCTATAGCCTTGGCGTAGGAACTCGCA
+>38384
+GCTACAACTGCGACAGAAAATTCGGA
+>38468
+TATTATCGTCCGCTGCTAAACTGC
+>38491
+TGCATACTTCCGTTCTCTTTTCGGGA
+>38673
+TTGTGTATTGTGATTCTGATTCGTG
+>38745
+TCCCTGGCCCGCTAGACAGCAGGA
+>38784
+TGTGCTAATAATTAAAGTCGACTGA
+>38873
+TGTCATACTTTCGTCAAAA
+>39154
+TAATAGTCAGGGCGCGAATTTTTAAAA
+>39325
+TTAGGACTTATTGAACTTTACGGTA
+>39359
+TATGCAAATCAAGTGTGACCGTAGCT
+>39450
+TTAAACAACGATATAGTGGACAGTA
+>39453
+TCTGCCGGATTTTGATCCAATCAAGG
+>39819
+GTGGAATTGAAAAAGAACCAGACACA
+>40040
+TTTTTGGAACTACCTGAGTCGGTT
+>40183
+TAACACAAAGCAGTATGATTTAATAAT
+>40189
+CAGCAAGCTGAGATGTACATTAGTATA
+>40374
+TACGTTTTCTTGCAGATCAAAAA
+>40445
+CGTTCTTTAAAACCACCAATGGGA
+>40467
+TACGCAGATTCCTGGGAGTTACAGGA
+>40488
+TGATTTGGGCTTGCATACTTGTACT
+>40797
+TATATTCGTGTTCATGTGTGAACAGC
+>40831
+TCTAAGAACTTCTGAGGTGAAGGGC
+>40951
+TTCTTCGTAAGTCAAAATAGTGTCGCC
+>40988
+TATGATTGATTGCTTGAGAGT
+>40998
+TGATAGAGCTGCATTTGAATTAACGG
+>41061
+CTCTTTCCGCTCACTCCCGCTGAGA
+>41076
+TGCATTAAGAAGATTTAGGATCC
+>41140
+TCAAGGATTAATGTAGGGGGGG
+>41143
+CAATAGCGTCGCTGAGTAACAGTG
+>41162
+TACGGATTGCAGCGGCTAG
+>41183
+TTTTTTGGCACACGATTTTTTGGACGT
+>41227
+TAATATATGTATACTCTGCTTGGTG
+>41265
+TTTATGATTTTTGGTAATA
+>41365
+TAGGAGGGTTCCACAACTATTTCGGGG
+>41837
+TAGTTTGACACTGTTTGGAGACGTGG
+>42053
+TCTGTGGTCGAATCGAAGGAGTGC
+>42113
+TAATAGATCGCTCACCTGTTCCTGG
+>42392
+CGGCATCGGAAAACTCCCAGCGGGGC
+>42504
+TAACGTTATTATTATTTGAAAATAGAA
+>42566
+TTTGTTGGGTAGGAACTTTACTGC
+>42668
+TGCTGCAGAGCTTCTTCCAACGTTGG
+>42751
+CCAAAGTCTGGTTGTCAGAAAATGTGC
+>42777
+AAGGAAGGAACCAAAGAAGCACAAACG
+>42804
+TAATTCTAATTTGTCTCTCATCTGA
+>42830
+TAGTATACTTATTAAGTCATTTGA
+>42857
+AGAGTATTCATCTTGAGGCGTGTG
+>42886
+TGAACAACGATTTATGTATATAAGAA
+>42899
+TACGATAATAGTGAATTTTGGACA
+>42936
+TTTGACTAAAAGTCGCTTGTTTTGGA
+>43050
+AATACATAACTCTGGACACAGGAGA
+>43219
+TACTTTCGTCAAAATGTTCAGGAGCT
+>43242
+TGACATGTCTATTTCCATGGGTTCGGA
+>43268
+GTGTATAAAAAAATTTATTGTTGAGCA
+>43299
+TCTGGATGATGGCTGATGCTCGTTG
+>43491
+TCTGATGACAATGAATTTTTTAGACA
+>43520
+TATTGTTGTAATTGCTGCCTCGGTTG
+>43654
+ATATGAACAAAGCAAAGACACTAGAA
+>43677
+CACGATTCATAACCCTCAGCTGAAG
+>43776
+GCTCCTGGCAACTCTGTGATGGAC
+>43903
+TATTTTGATGTTTAATGAA
+>43990
+TTTCTACTTCGTATTATTTTTATGA
+>44062
+AAACATAATAATTGATGGCGGAAGA
+>44071
+CACAGACGCAGTGGAAACCGCAGA
+>44078
+CGTGCGTCCGAGCAAAAGGTGGT
+>44187
+TTATGTAAGAATATTTGTCATTAGA
+>44239
+TATCAATGTTGACCGTAATACTCAA
+>44253
+TGGATATTGAATGTTTTTGATTTGC
+>44279
+TGTGATTTTCCCAATTTATATTAATACA
+>44374
+TAGTCGGAGTTGATGAGCTGCC
+>44412
+TCCGAATTTTCTGTCGCAGTTGTAGCT
+>44423
+TCGGCTTGGGTTTAGAAATTAAAAT
+>44558
+ACAAATCATAAATTTGGTGGGA
+>44637
+TTTTTTATCAATTGGCACCATGCGAA
+>44642
+TACTGTGTGCTGGCCTTGATGAAAGT
+>44688
+TAGATGTCTGCACGAATAACGGA
+>44945
+TGCTTCCGAGCAATCTACGTTGGTAAAA
+>45054
+TCTCATCTGACAATTTTTTAAAAGCGA
+>45088
+TCTGAAGCAGCGCTCACGGCAGAATGC
+>45203
+TGCCGGATTTTGATCCAATCAAGGGA
+>45263
+TATTGATTTTCCTATTTAGTTGAACA
+>45274
+AAAAGTCTGGATATTGTAGGATAGGA
+>45358
+CAACGCTGGACCTTGGACTCGAGGGC
+>45396
+CATCTTGTTATTCTAGTGTCTTTGGTA
+>45641
+GAACTAAGATCAGTCGACTGTAATTAT
+>45774
+CGACGAACTAGCAGCTCTGGTGT
+>45911
+CAGATATTAGTGTTTTTCAAGCAGC
+>45997
+TTAACCAGTCGGCGTTGTTTAAGTAGC
+>46031
+AAAGCGTCTACTTGAACAATGAGA
+>46054
+TTAGATCGTATTACTTGGGTGCTGG
+>46199
+TAAGCGTTAGGTCGTATTACTTGGGC
+>46255
 TAAGATTGAAAATTACTGTGGAGTAAT
->HWI-1745
-TTCATCCTGCTGCCGGAGC
->HWI-1816
-CTTCCGTTATTCCTGCAGACA
->HWI-1845
-TAATATCGAACCCGAACTGAGAA
->HWI-1852
-TGGACACCGTCGTTCCACTTGAACT
->HWI-1887
-TCAAGGTAGGGTTGTCGCGTGTATTT
->HWI-1892
-TATTATCGTCCGCTGCTAAACTG
->HWI-1936
-TAGCTGCAACTTCTGAAGTTATGGC
->HWI-1997
-CAAAACAAGAATTTTTCGCATGGTGCC
->HWI-2055
-TAAACTAATTCTGTCGGTTTTCTGT
->HWI-2075
-AGACTTTGAACTAAAAACAGTTACCT
->HWI-2244
-TAACTTCTTCTATTTTCGTGCGGGA
->HWI-2253
-TAACTGCCCTCGATCACGCCTTCCCGA
->HWI-2277
-TGACGACGACAGTATCGCAAGACGGT
->HWI-2409
-GTCAGATGAGAGACAAATTAGAAT
->HWI-2418
-TAATATCGAACCCGAACTGAGAAC
->HWI-2490
-TTTATGACAACCCGATCGGACCTCACTC
->HWI-2557
-TGTCCGGAGATCAAGAAGTGTTGGCAC
->HWI-2670
-TCAGATGAGAGACAAATTAGAATTAAA
->HWI-2719
-TATTATCGTCCGCTGCTAAACTGCTG
->HWI-2834
-CCTAGGAGAAGGAAGTCAAGAAGGCC
+>46366
+TAAGGAATTGTCGGCCATTTAATGTGA
+>46387
+CAGAGCTTCTTCCAACGTTGGCAAG
+>46701
+TTGTATCTTTTTGCTTTTTATATT
+>46705
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>46783
+TCTCATCTGACAATTTTTTAAAAGCGA
+>46836
+TCATAAGGACAGACGGACAT
+>46892
+AAACATAATAATTGATGGCGGAAGA
+>47244
+TCTCTGTCCGCTCGCTTACGATGAGA
+>47290
+TTTCAGGAATGGGGTCGTCCCACTA
+>47348
+TGCAAAAGTAATATCAAAGACACTA
+>47359
+TTTAAAATTATAATAGTCAGGG
+>47427
+CATCTTGTTATTCTAGTGTCTTTGGT
+>47654
+TTAAACACTGAATTCGGTTTCGAAA
+>47656
+TTTCTGAAGAATCCTGTAACTCCC
+>47785
+TGTAGATGAGCGGCAAATGTGG
+>47827
+TTGATCAAGGTAGAGTTGTCGCGC
+>48184
+TTCAAGGATTAATGTAGGGGGGG
+>48576
+TTAACCCGGAGACTTGGGTGTGGGT
+>48764
+TGCAAAAGTAATATCAAAGACAATAGA
+>48782
+TGACAATGTAGTGAACGCCAGTGT
+>48893
+TATAAATGCCGTCTGATATTATTAAA
+>49017
+ACCGGATGTCATATCCAGCGTCGTGAA
+>49381
+TACAATGTAAATTCGTTTCTTCGATCA
+>49456
+TTTTGGTTTGATCGTCAGGTGGACGC
+>49484
+TCTGCTATCATTGACTCGATCATTGA
+>49569
+TCGATTGTATGATCAGTGGAAGTGGC
+>49602
+GAAATTGGCCAACATTAATTCGGAA
+>49628
+TACTTTCGTCAAAATGTTCAGGAGC
+>49715
+TTTGTCCGGGTGCTTCGAAAGAACTCT
+>49778
+CAATAGCGTCGCTGAGTAACAGTG
--- a/test-data/unmatched_3.fa	Sat Apr 09 22:45:21 2022 +0000
+++ b/test-data/unmatched_3.fa	Mon Apr 11 00:27:41 2022 +0000
@@ -1,177 +1,3 @@
->25207
-TCGCACTAGCTTCGGTCGTCTCGGA
->25229
-TCTGTTCGAGGTTCTTTATTTGAA
->25283
-TAAGGGAGCGAGATCGTTCGATGCA
->25363
-TAGTGTTGGGTGTAGGCCAGGGAG
->25376
-TCCGCCGACAATGTTGCAACATGA
->25435
-TTTCTTCAAGCTGCGCGTTTTTCGG
->25593
-TCGAAAACTTCTTCGCACTCTTCGTT
->25660
-TCTCGCTGCGTGAACGATGAAGGC
->25968
-TTTGCTAGGACTGCTGCATAAG
->26220
-GCCCGTGTCGATGAATGCTTCAAACT
->26261
-TATTATCGTCCGCTGCTAAACTGC
->26543
-CTTATGTATAAAAAGCTCTGATGGA
->26610
-TCCTTAAGTTCGTTGATTTGGGCT
->26675
-CAATATCGTCAACATCCTCGAACGAT
->26873
-TTGGGAGCTGAATCCCGTTACGGTA
->26978
-CGCGCATTAGCTCAAATCTAGTTGGA
->27220
-TGTTTATTTGTCAAGTTTAGATAATA
->27397
-TAGTTCACAGCTGTATGTCCAGATGGGT
->27869
-TATGGTCCAGAATGTAGCCTCGGC
->27878
-TATTAATCGGGCCACAAATATCGGTA
->28035
-GGTATTCTTTGCGAGGTCGTCCTGG
->28154
-AAAGACGAGAACGCGTATATGTGTGC
->28319
-TGACCAACCTTAAAAGATCGGGGT
->28387
-TATTACTATTTCTAAGCTTTGTTTAAA
->28594
-TTTTGAGGTTGGTCAAGAAGTTGTT
->28608
-TAAGATTGAAAATTACTGTGGAGT
->28668
-TACAGATTCTAGAGACAAAGACGC
->28674
-TAAGTACATGCGCCCAGCCGCCGTGA
->28836
-TCTGGTTAAGGTCGGAATACTCGTCT
->29056
-TGCTTTACATACCCTTTGGTGCCC
->29323
-TTTTGCTGCAGAGCTTCTTCCAACGT
->29342
-TATGTATGGATATGTATATTTATGGT
->29387
-TGATAATCGACCTCTTCCATCGTTGT
->29423
-TCTAAGAACTTCTGAGGTGAAGG
->29462
-TGCATTTCAATCGGAAGAGTACTCTG
->29492
-TAAAACACAAATCTCGACATACAGA
->29703
-GTACAGGTTCTGATGACAATG
->29785
-TCAGATGAAAGACAAATTAGAATT
->30053
-TAAGAACTTCTGAGATGAAGGGC
->30096
-TGGATATTGAATGTTTTTGATTTGC
->30136
-TGTAGTCGTCGTATGTCCGGA
->30272
-TGCATTCGTGGATTCGCATTCGAGA
->30497
-TGACAAAATATGCCCTTCACCTCAGA
->30787
-TAACTGTTTTTAGTTCAAAGTCTCGGA
->30836
-TAAATATTTTTTTTGAAACA
->30977
-CGACTGTAATTATTAGCACAATACT
->30985
-GACGATATTGCTGCAATAGACCTTGA
->31000
-TCAGATGAGAGACAAATTAGAAT
->31256
-TAATTCGGAATGCCTGCTCTACT
->31417
-TCAATGATCGCTGTGCTCAGTAGGA
->31506
-TTTGTCTGACGTTAAAAAATATA
->31567
-TCCACATTAGGAGGATTATTAGACAAC
->31790
-AAACATAATAATTGATGGCGGAAGA
->31872
-AAGGTAATCATAGAGCACCACGGTT
->32157
-TTTCTGTGAATTCACATGCTGATGA
->32192
-TTTCTCATGTATAAAATGCTCTGATGG
->32223
-TATCTTGTTATTCTAGTGTCTTTGGTT
->32338
-TGTGGGACTCGAGCCAAAATGGCAACCT
->32497
-TGCGGTTGGACAATTTTTTTTTTATA
->32506
-TTGTTGTTTGGAGGAAGTTCCTTT
->32510
-TCTTCCGCCATCAATTATTATGTTTT
->32522
-TACTTGACTTTTCTATAGAATCTGGT
->32540
-TATCGTCAACATCCTCGAACGATCGAGA
->32626
-TTGATCAAGGTAGGGTTGTCGC
->32646
-TCTGTTAAACACCCCTGAATCGTGGAT
->32657
-TTTGGACATTTTGCAGGTGATACAAT
->32682
-TAACTGTTTTTAGTTCAAAGTCTCGGA
->32716
-TATCTATAGTTCCGATTGGCCATCTC
->32885
-GAAAGTGGGTATCTGTATTTTAGGC
->32967
-TAAAGATACCATCTAACCTCCTTGGA
->33075
-ACAGATATTAGTGTTTTTCAAGCAGC
->33147
-ATCGCAATTATGGCATAACAGATTCGGA
->33163
-TTCAGCGTCGCTTGATTGAATAGAT
->33231
-TAAGAACTTCTGAGGTGAAGGGC
->33242
-TTGGGTTTAGAAATTAAAATTAAGGC
->33244
-TATAGAAAGTTACGAATATATTAGGA
->33248
-TTTTTGATCAATTGGCACCGTGCGAA
->33296
-TGCAGAGCTTCTTCCAACGTTGGCAAG
->33318
-TAGATGTCTGCAGGAATAACGGA
->33338
-TCGACTATTAATGGCTGTTAGAATT
->33475
-CAAACTTATCGACCATCTCCTCAAACG
->33601
-TTACCCTTTTTCCGGAGCGTTTGTGC
->33611
-AATGATCGCTGTGCTCAGTATGACGG
->33662
-TTTGATTCATTACAATTTACGCTGAA
->33737
-CAGATATTAGTGTTTTTCAAGCAGC
->33747
-TACATGTAAAGCAGCTGTGTGTGC
->33815
-AGTTTTTGGAATCACTTGA
 >34032
 TGCGGACGTGTGCTCGCTGCGTGA
 >34200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_4.fa	Mon Apr 11 00:27:41 2022 +0000
@@ -0,0 +1,332 @@
+>30787
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>30836
+TAAATATTTTTTTTGAAACA
+>30977
+CGACTGTAATTATTAGCACAATACT
+>30985
+GACGATATTGCTGCAATAGACCTTGA
+>31000
+TCAGATGAGAGACAAATTAGAAT
+>31256
+TAATTCGGAATGCCTGCTCTACT
+>31417
+TCAATGATCGCTGTGCTCAGTAGGA
+>31506
+TTTGTCTGACGTTAAAAAATATA
+>31567
+TCCACATTAGGAGGATTATTAGACAAC
+>31790
+AAACATAATAATTGATGGCGGAAGA
+>31872
+AAGGTAATCATAGAGCACCACGGTT
+>32157
+TTTCTGTGAATTCACATGCTGATGA
+>32192
+TTTCTCATGTATAAAATGCTCTGATGG
+>32223
+TATCTTGTTATTCTAGTGTCTTTGGTT
+>32338
+TGTGGGACTCGAGCCAAAATGGCAACCT
+>32497
+TGCGGTTGGACAATTTTTTTTTTATA
+>32506
+TTGTTGTTTGGAGGAAGTTCCTTT
+>32510
+TCTTCCGCCATCAATTATTATGTTTT
+>32522
+TACTTGACTTTTCTATAGAATCTGGT
+>32540
+TATCGTCAACATCCTCGAACGATCGAGA
+>32626
+TTGATCAAGGTAGGGTTGTCGC
+>32646
+TCTGTTAAACACCCCTGAATCGTGGAT
+>32657
+TTTGGACATTTTGCAGGTGATACAAT
+>32682
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>32716
+TATCTATAGTTCCGATTGGCCATCTC
+>32885
+GAAAGTGGGTATCTGTATTTTAGGC
+>32967
+TAAAGATACCATCTAACCTCCTTGGA
+>33075
+ACAGATATTAGTGTTTTTCAAGCAGC
+>33147
+ATCGCAATTATGGCATAACAGATTCGGA
+>33163
+TTCAGCGTCGCTTGATTGAATAGAT
+>33231
+TAAGAACTTCTGAGGTGAAGGGC
+>33242
+TTGGGTTTAGAAATTAAAATTAAGGC
+>33244
+TATAGAAAGTTACGAATATATTAGGA
+>33248
+TTTTTGATCAATTGGCACCGTGCGAA
+>33296
+TGCAGAGCTTCTTCCAACGTTGGCAAG
+>33318
+TAGATGTCTGCAGGAATAACGGA
+>33338
+TCGACTATTAATGGCTGTTAGAATT
+>33475
+CAAACTTATCGACCATCTCCTCAAACG
+>33601
+TTACCCTTTTTCCGGAGCGTTTGTGC
+>33611
+AATGATCGCTGTGCTCAGTATGACGG
+>33662
+TTTGATTCATTACAATTTACGCTGAA
+>33737
+CAGATATTAGTGTTTTTCAAGCAGC
+>33747
+TACATGTAAAGCAGCTGTGTGTGC
+>33815
+AGTTTTTGGAATCACTTGA
+>34032
+TGCGGACGTGTGCTCGCTGCGTGA
+>34200
+TAATATATGTATAATCTGCTTGGTG
+>34240
+TCTTTACGCCATATAAATCATTTCGA
+>34259
+ACAAATCATAAATTTGATGGGACGA
+>34268
+TCTTGTGGACTTCACTCTAG
+>34297
+TAAGTAAATAGTCCCCGCCTTATTGAGG
+>34337
+TGGATAATGAATGTTTTTGATTTGC
+>34569
+TCTAGGTTCTTCTGAAATCGTGGGA
+>34643
+TCGTGTAGACCGGATAAGATTTTTT
+>34697
+TCTAATACTGTGAAAGGGTGGGG
+>34808
+GTAGGATGTGCTCTGCGGTTTCCAC
+>34892
+TCTGTCGCAGTTGTAGCTTGCAATA
+>34896
+TGTAGTTGCCACTTATGCTGTCCA
+>35151
+TAGCAATGTCCGTCTGTCCGTATGA
+>35196
+TGTAAATGGTCAGCGAAAGCAAAGG
+>35211
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>35246
+CCTCAGAGAACGTCAGACCGCG
+>35272
+TGCAGAGCTTCTTCCAACGTTGG
+>35273
+TCTGGTAGTAAGAAAAATGTAGCTT
+>35286
+TATGTATGGATATGTATATTTATGGGT
+>35317
+TTTTGGTTTGATCGTCAGGTGGTC
+>35512
+GACGATAATAGTGAATTTTGGACA
+>35566
+TTCGAATTCGCGCCATTTCACAATC
+>35948
+TGGCCTGTATACGCTTTCTGTTG
+>36009
+TTGCAAAAGTCATATCTTGAGG
+>36059
+TAAGTTACTATGGATCCATAAGGGTA
+>36100
+TGATATGGGACTTGTAGCTTTTTTAAA
+>36121
+TGTCGTGGGCTGTGCGTTTGAGAA
+>36235
+TGCTACCTTTAGCTGCAAGATTAACT
+>36238
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>36523
+TGCGAGTCCGAGCTTTGAACGTGGG
+>36607
+TCTCAATGTAATGTCTTCTTTTTGGA
+>36624
+CGTTCTTTAAAACCACCAATGGGA
+>36776
+TACTCACGACCATTGTCATTTCTCAAG
+>36825
+CGTTCGAGGTCCACTTTCTTAGCGGA
+>36850
+TCTGATGTCGGCGAGAAAGGAGTCTCA
+>36872
+TCAAGGCCAGCACACAGTAACATGGT
+>36968
+ACATTGGAAATACCGCGGGACCGC
+>36994
+TATACGTTTTTTGACCTCTTCTCTTTGA
+>37110
+TGATAATGTAAACAAAGATAAAGGG
+>37210
+TCAGATGAAAGACAAATTAGAATTAAA
+>37224
+GACGATAATAGTGAATTTTGGACA
+>37226
+TCTGAAGTTGCCGCACTAGAGATGG
+>37339
+TATACAGTCCACTATATCGTTGTTTAA
+>37395
+TACAATTTACGCTGAATTTAAATGAA
+>37460
+CACAAAGTAACGTGCACCACCATTT
+>37766
+TCTGTGCAACTTTGTACGCGAAGTCAGC
+>37836
+TCAAGGATTAATGTAGGGGGGGGG
+>38096
+TTTGGAGGTAGAAGTCTTAGTGGCCGC
+>38100
+GACAAAATATGCCCTTCAATTTAGA
+>38113
+TATGGTCCAGAATGTAGCCTCGGC
+>38213
+TAACTGTTTTTAGTTCAAAGTCTCGAA
+>38224
+TTTCAGTTTTTATTGTTAGTCACAGG
+>38263
+TAACTGCGAAGTCGATCAGGTCCGA
+>38266
+ATCCGGACGATTGACGAGGAGCCCATT
+>38271
+TTATGTCAGTGTCGAAGGCGATCGAA
+>38373
+TCTATAGCCTTGGCGTAGGAACTCGCA
+>38384
+GCTACAACTGCGACAGAAAATTCGGA
+>38468
+TATTATCGTCCGCTGCTAAACTGC
+>38491
+TGCATACTTCCGTTCTCTTTTCGGGA
+>38673
+TTGTGTATTGTGATTCTGATTCGTG
+>38745
+TCCCTGGCCCGCTAGACAGCAGGA
+>38784
+TGTGCTAATAATTAAAGTCGACTGA
+>38873
+TGTCATACTTTCGTCAAAA
+>39154
+TAATAGTCAGGGCGCGAATTTTTAAAA
+>39325
+TTAGGACTTATTGAACTTTACGGTA
+>39359
+TATGCAAATCAAGTGTGACCGTAGCT
+>39450
+TTAAACAACGATATAGTGGACAGTA
+>39453
+TCTGCCGGATTTTGATCCAATCAAGG
+>39819
+GTGGAATTGAAAAAGAACCAGACACA
+>40040
+TTTTTGGAACTACCTGAGTCGGTT
+>40183
+TAACACAAAGCAGTATGATTTAATAAT
+>40189
+CAGCAAGCTGAGATGTACATTAGTATA
+>40374
+TACGTTTTCTTGCAGATCAAAAA
+>40445
+CGTTCTTTAAAACCACCAATGGGA
+>40467
+TACGCAGATTCCTGGGAGTTACAGGA
+>40488
+TGATTTGGGCTTGCATACTTGTACT
+>40797
+TATATTCGTGTTCATGTGTGAACAGC
+>40831
+TCTAAGAACTTCTGAGGTGAAGGGC
+>40951
+TTCTTCGTAAGTCAAAATAGTGTCGCC
+>40988
+TATGATTGATTGCTTGAGAGT
+>40998
+TGATAGAGCTGCATTTGAATTAACGG
+>41061
+CTCTTTCCGCTCACTCCCGCTGAGA
+>41076
+TGCATTAAGAAGATTTAGGATCC
+>41140
+TCAAGGATTAATGTAGGGGGGG
+>41143
+CAATAGCGTCGCTGAGTAACAGTG
+>41162
+TACGGATTGCAGCGGCTAG
+>41183
+TTTTTTGGCACACGATTTTTTGGACGT
+>41227
+TAATATATGTATACTCTGCTTGGTG
+>41265
+TTTATGATTTTTGGTAATA
+>41365
+TAGGAGGGTTCCACAACTATTTCGGGG
+>41837
+TAGTTTGACACTGTTTGGAGACGTGG
+>42053
+TCTGTGGTCGAATCGAAGGAGTGC
+>42113
+TAATAGATCGCTCACCTGTTCCTGG
+>42392
+CGGCATCGGAAAACTCCCAGCGGGGC
+>42504
+TAACGTTATTATTATTTGAAAATAGAA
+>42566
+TTTGTTGGGTAGGAACTTTACTGC
+>42668
+TGCTGCAGAGCTTCTTCCAACGTTGG
+>42751
+CCAAAGTCTGGTTGTCAGAAAATGTGC
+>42777
+AAGGAAGGAACCAAAGAAGCACAAACG
+>42804
+TAATTCTAATTTGTCTCTCATCTGA
+>42830
+TAGTATACTTATTAAGTCATTTGA
+>42857
+AGAGTATTCATCTTGAGGCGTGTG
+>42886
+TGAACAACGATTTATGTATATAAGAA
+>42899
+TACGATAATAGTGAATTTTGGACA
+>42936
+TTTGACTAAAAGTCGCTTGTTTTGGA
+>43050
+AATACATAACTCTGGACACAGGAGA
+>43219
+TACTTTCGTCAAAATGTTCAGGAGCT
+>43242
+TGACATGTCTATTTCCATGGGTTCGGA
+>43268
+GTGTATAAAAAAATTTATTGTTGAGCA
+>43299
+TCTGGATGATGGCTGATGCTCGTTG
+>43491
+TCTGATGACAATGAATTTTTTAGACA
+>43520
+TATTGTTGTAATTGCTGCCTCGGTTG
+>43654
+ATATGAACAAAGCAAAGACACTAGAA
+>43677
+CACGATTCATAACCCTCAGCTGAAG
+>43776
+GCTCCTGGCAACTCTGTGATGGAC
+>43903
+TATTTTGATGTTTAATGAA
+>43990
+TTTCTACTTCGTATTATTTTTATGA
+>44062
+AAACATAATAATTGATGGCGGAAGA
+>44071
+CACAGACGCAGTGGAAACCGCAGA
+>44078
+CGTGCGTCCGAGCAAAAGGTGGT
+>44187
+TTATGTAAGAATATTTGTCATTAGA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_5.fa	Mon Apr 11 00:27:41 2022 +0000
@@ -0,0 +1,412 @@
+>30787
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>30836
+TAAATATTTTTTTTGAAACA
+>30977
+CGACTGTAATTATTAGCACAATACT
+>30985
+GACGATATTGCTGCAATAGACCTTGA
+>31000
+TCAGATGAGAGACAAATTAGAAT
+>31256
+TAATTCGGAATGCCTGCTCTACT
+>31417
+TCAATGATCGCTGTGCTCAGTAGGA
+>31506
+TTTGTCTGACGTTAAAAAATATA
+>31567
+TCCACATTAGGAGGATTATTAGACAAC
+>31790
+AAACATAATAATTGATGGCGGAAGA
+>31872
+AAGGTAATCATAGAGCACCACGGTT
+>32157
+TTTCTGTGAATTCACATGCTGATGA
+>32192
+TTTCTCATGTATAAAATGCTCTGATGG
+>32223
+TATCTTGTTATTCTAGTGTCTTTGGTT
+>32338
+TGTGGGACTCGAGCCAAAATGGCAACCT
+>32497
+TGCGGTTGGACAATTTTTTTTTTATA
+>32506
+TTGTTGTTTGGAGGAAGTTCCTTT
+>32510
+TCTTCCGCCATCAATTATTATGTTTT
+>32522
+TACTTGACTTTTCTATAGAATCTGGT
+>32540
+TATCGTCAACATCCTCGAACGATCGAGA
+>32626
+TTGATCAAGGTAGGGTTGTCGC
+>32646
+TCTGTTAAACACCCCTGAATCGTGGAT
+>32657
+TTTGGACATTTTGCAGGTGATACAAT
+>32682
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>32716
+TATCTATAGTTCCGATTGGCCATCTC
+>32885
+GAAAGTGGGTATCTGTATTTTAGGC
+>32967
+TAAAGATACCATCTAACCTCCTTGGA
+>33075
+ACAGATATTAGTGTTTTTCAAGCAGC
+>33147
+ATCGCAATTATGGCATAACAGATTCGGA
+>33163
+TTCAGCGTCGCTTGATTGAATAGAT
+>33231
+TAAGAACTTCTGAGGTGAAGGGC
+>33242
+TTGGGTTTAGAAATTAAAATTAAGGC
+>33244
+TATAGAAAGTTACGAATATATTAGGA
+>33248
+TTTTTGATCAATTGGCACCGTGCGAA
+>33296
+TGCAGAGCTTCTTCCAACGTTGGCAAG
+>33318
+TAGATGTCTGCAGGAATAACGGA
+>33338
+TCGACTATTAATGGCTGTTAGAATT
+>33475
+CAAACTTATCGACCATCTCCTCAAACG
+>33601
+TTACCCTTTTTCCGGAGCGTTTGTGC
+>33611
+AATGATCGCTGTGCTCAGTATGACGG
+>33662
+TTTGATTCATTACAATTTACGCTGAA
+>33737
+CAGATATTAGTGTTTTTCAAGCAGC
+>33747
+TACATGTAAAGCAGCTGTGTGTGC
+>33815
+AGTTTTTGGAATCACTTGA
+>34032
+TGCGGACGTGTGCTCGCTGCGTGA
+>34200
+TAATATATGTATAATCTGCTTGGTG
+>34240
+TCTTTACGCCATATAAATCATTTCGA
+>34259
+ACAAATCATAAATTTGATGGGACGA
+>34268
+TCTTGTGGACTTCACTCTAG
+>34297
+TAAGTAAATAGTCCCCGCCTTATTGAGG
+>34337
+TGGATAATGAATGTTTTTGATTTGC
+>34569
+TCTAGGTTCTTCTGAAATCGTGGGA
+>34643
+TCGTGTAGACCGGATAAGATTTTTT
+>34697
+TCTAATACTGTGAAAGGGTGGGG
+>34808
+GTAGGATGTGCTCTGCGGTTTCCAC
+>34892
+TCTGTCGCAGTTGTAGCTTGCAATA
+>34896
+TGTAGTTGCCACTTATGCTGTCCA
+>35151
+TAGCAATGTCCGTCTGTCCGTATGA
+>35196
+TGTAAATGGTCAGCGAAAGCAAAGG
+>35211
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>35246
+CCTCAGAGAACGTCAGACCGCG
+>35272
+TGCAGAGCTTCTTCCAACGTTGG
+>35273
+TCTGGTAGTAAGAAAAATGTAGCTT
+>35286
+TATGTATGGATATGTATATTTATGGGT
+>35317
+TTTTGGTTTGATCGTCAGGTGGTC
+>35512
+GACGATAATAGTGAATTTTGGACA
+>35566
+TTCGAATTCGCGCCATTTCACAATC
+>35948
+TGGCCTGTATACGCTTTCTGTTG
+>36009
+TTGCAAAAGTCATATCTTGAGG
+>36059
+TAAGTTACTATGGATCCATAAGGGTA
+>36100
+TGATATGGGACTTGTAGCTTTTTTAAA
+>36121
+TGTCGTGGGCTGTGCGTTTGAGAA
+>36235
+TGCTACCTTTAGCTGCAAGATTAACT
+>36238
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>36523
+TGCGAGTCCGAGCTTTGAACGTGGG
+>36607
+TCTCAATGTAATGTCTTCTTTTTGGA
+>36624
+CGTTCTTTAAAACCACCAATGGGA
+>36776
+TACTCACGACCATTGTCATTTCTCAAG
+>36825
+CGTTCGAGGTCCACTTTCTTAGCGGA
+>36850
+TCTGATGTCGGCGAGAAAGGAGTCTCA
+>36872
+TCAAGGCCAGCACACAGTAACATGGT
+>36968
+ACATTGGAAATACCGCGGGACCGC
+>36994
+TATACGTTTTTTGACCTCTTCTCTTTGA
+>37110
+TGATAATGTAAACAAAGATAAAGGG
+>37210
+TCAGATGAAAGACAAATTAGAATTAAA
+>37224
+GACGATAATAGTGAATTTTGGACA
+>37226
+TCTGAAGTTGCCGCACTAGAGATGG
+>37339
+TATACAGTCCACTATATCGTTGTTTAA
+>37395
+TACAATTTACGCTGAATTTAAATGAA
+>37460
+CACAAAGTAACGTGCACCACCATTT
+>37766
+TCTGTGCAACTTTGTACGCGAAGTCAGC
+>37836
+TCAAGGATTAATGTAGGGGGGGGG
+>38096
+TTTGGAGGTAGAAGTCTTAGTGGCCGC
+>38100
+GACAAAATATGCCCTTCAATTTAGA
+>38113
+TATGGTCCAGAATGTAGCCTCGGC
+>38213
+TAACTGTTTTTAGTTCAAAGTCTCGAA
+>38224
+TTTCAGTTTTTATTGTTAGTCACAGG
+>38263
+TAACTGCGAAGTCGATCAGGTCCGA
+>38266
+ATCCGGACGATTGACGAGGAGCCCATT
+>38271
+TTATGTCAGTGTCGAAGGCGATCGAA
+>38373
+TCTATAGCCTTGGCGTAGGAACTCGCA
+>38384
+GCTACAACTGCGACAGAAAATTCGGA
+>38468
+TATTATCGTCCGCTGCTAAACTGC
+>38491
+TGCATACTTCCGTTCTCTTTTCGGGA
+>38673
+TTGTGTATTGTGATTCTGATTCGTG
+>38745
+TCCCTGGCCCGCTAGACAGCAGGA
+>38784
+TGTGCTAATAATTAAAGTCGACTGA
+>38873
+TGTCATACTTTCGTCAAAA
+>39154
+TAATAGTCAGGGCGCGAATTTTTAAAA
+>39325
+TTAGGACTTATTGAACTTTACGGTA
+>39359
+TATGCAAATCAAGTGTGACCGTAGCT
+>39450
+TTAAACAACGATATAGTGGACAGTA
+>39453
+TCTGCCGGATTTTGATCCAATCAAGG
+>39819
+GTGGAATTGAAAAAGAACCAGACACA
+>40040
+TTTTTGGAACTACCTGAGTCGGTT
+>40183
+TAACACAAAGCAGTATGATTTAATAAT
+>40189
+CAGCAAGCTGAGATGTACATTAGTATA
+>40374
+TACGTTTTCTTGCAGATCAAAAA
+>40445
+CGTTCTTTAAAACCACCAATGGGA
+>40467
+TACGCAGATTCCTGGGAGTTACAGGA
+>40488
+TGATTTGGGCTTGCATACTTGTACT
+>40797
+TATATTCGTGTTCATGTGTGAACAGC
+>40831
+TCTAAGAACTTCTGAGGTGAAGGGC
+>40951
+TTCTTCGTAAGTCAAAATAGTGTCGCC
+>40988
+TATGATTGATTGCTTGAGAGT
+>42668
+TGCTGCAGAGCTTCTTCCAACGTTGG
+>42751
+CCAAAGTCTGGTTGTCAGAAAATGTGC
+>42777
+AAGGAAGGAACCAAAGAAGCACAAACG
+>42804
+TAATTCTAATTTGTCTCTCATCTGA
+>42830
+TAGTATACTTATTAAGTCATTTGA
+>42857
+AGAGTATTCATCTTGAGGCGTGTG
+>42886
+TGAACAACGATTTATGTATATAAGAA
+>42899
+TACGATAATAGTGAATTTTGGACA
+>42936
+TTTGACTAAAAGTCGCTTGTTTTGGA
+>43050
+AATACATAACTCTGGACACAGGAGA
+>43219
+TACTTTCGTCAAAATGTTCAGGAGCT
+>43242
+TGACATGTCTATTTCCATGGGTTCGGA
+>43268
+GTGTATAAAAAAATTTATTGTTGAGCA
+>43299
+TCTGGATGATGGCTGATGCTCGTTG
+>43491
+TCTGATGACAATGAATTTTTTAGACA
+>43520
+TATTGTTGTAATTGCTGCCTCGGTTG
+>43654
+ATATGAACAAAGCAAAGACACTAGAA
+>43677
+CACGATTCATAACCCTCAGCTGAAG
+>43776
+GCTCCTGGCAACTCTGTGATGGAC
+>43903
+TATTTTGATGTTTAATGAA
+>43990
+TTTCTACTTCGTATTATTTTTATGA
+>44062
+AAACATAATAATTGATGGCGGAAGA
+>44071
+CACAGACGCAGTGGAAACCGCAGA
+>44078
+CGTGCGTCCGAGCAAAAGGTGGT
+>44187
+TTATGTAAGAATATTTGTCATTAGA
+>44239
+TATCAATGTTGACCGTAATACTCAA
+>44253
+TGGATATTGAATGTTTTTGATTTGC
+>44279
+TGTGATTTTCCCAATTTATATTAATACA
+>44374
+TAGTCGGAGTTGATGAGCTGCC
+>44412
+TCCGAATTTTCTGTCGCAGTTGTAGCT
+>44423
+TCGGCTTGGGTTTAGAAATTAAAAT
+>44558
+ACAAATCATAAATTTGGTGGGA
+>44637
+TTTTTTATCAATTGGCACCATGCGAA
+>44642
+TACTGTGTGCTGGCCTTGATGAAAGT
+>44688
+TAGATGTCTGCACGAATAACGGA
+>44945
+TGCTTCCGAGCAATCTACGTTGGTAAAA
+>45054
+TCTCATCTGACAATTTTTTAAAAGCGA
+>45088
+TCTGAAGCAGCGCTCACGGCAGAATGC
+>45203
+TGCCGGATTTTGATCCAATCAAGGGA
+>45263
+TATTGATTTTCCTATTTAGTTGAACA
+>45274
+AAAAGTCTGGATATTGTAGGATAGGA
+>45358
+CAACGCTGGACCTTGGACTCGAGGGC
+>45396
+CATCTTGTTATTCTAGTGTCTTTGGTA
+>45641
+GAACTAAGATCAGTCGACTGTAATTAT
+>45774
+CGACGAACTAGCAGCTCTGGTGT
+>45911
+CAGATATTAGTGTTTTTCAAGCAGC
+>45997
+TTAACCAGTCGGCGTTGTTTAAGTAGC
+>46031
+AAAGCGTCTACTTGAACAATGAGA
+>46054
+TTAGATCGTATTACTTGGGTGCTGG
+>46199
+TAAGCGTTAGGTCGTATTACTTGGGC
+>46255
+TAAGATTGAAAATTACTGTGGAGTAAT
+>46366
+TAAGGAATTGTCGGCCATTTAATGTGA
+>46387
+CAGAGCTTCTTCCAACGTTGGCAAG
+>46701
+TTGTATCTTTTTGCTTTTTATATT
+>46705
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>46783
+TCTCATCTGACAATTTTTTAAAAGCGA
+>46836
+TCATAAGGACAGACGGACAT
+>46892
+AAACATAATAATTGATGGCGGAAGA
+>47244
+TCTCTGTCCGCTCGCTTACGATGAGA
+>47290
+TTTCAGGAATGGGGTCGTCCCACTA
+>47348
+TGCAAAAGTAATATCAAAGACACTA
+>47359
+TTTAAAATTATAATAGTCAGGG
+>47427
+CATCTTGTTATTCTAGTGTCTTTGGT
+>47654
+TTAAACACTGAATTCGGTTTCGAAA
+>47656
+TTTCTGAAGAATCCTGTAACTCCC
+>47785
+TGTAGATGAGCGGCAAATGTGG
+>47827
+TTGATCAAGGTAGAGTTGTCGCGC
+>48184
+TTCAAGGATTAATGTAGGGGGGG
+>48576
+TTAACCCGGAGACTTGGGTGTGGGT
+>48764
+TGCAAAAGTAATATCAAAGACAATAGA
+>48782
+TGACAATGTAGTGAACGCCAGTGT
+>48893
+TATAAATGCCGTCTGATATTATTAAA
+>49017
+ACCGGATGTCATATCCAGCGTCGTGAA
+>49381
+TACAATGTAAATTCGTTTCTTCGATCA
+>49456
+TTTTGGTTTGATCGTCAGGTGGACGC
+>49484
+TCTGCTATCATTGACTCGATCATTGA
+>49569
+TCGATTGTATGATCAGTGGAAGTGGC
+>49602
+GAAATTGGCCAACATTAATTCGGAA
+>49628
+TACTTTCGTCAAAATGTTCAGGAGC
+>49715
+TTTGTCCGGGTGCTTCGAAAGAACTCT
+>49778
+CAATAGCGTCGCTGAGTAACAGTG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_fastq.fa	Mon Apr 11 00:27:41 2022 +0000
@@ -0,0 +1,78 @@
+>HWI-176
+CGAGACTTTGAACTAAAAACAGT
+>HWI-277
+TCAGATGAGAGACAAATTAGAAT
+>HWI-458
+TTTCGAGGTTCCGAATTTTCTGTC
+>HWI-778
+TAGGATGTGCTCTGCGGTTTCCACT
+>HWI-895
+TTTAATTGGCGCAGTCGGTAGGATC
+>HWI-967
+GAGAATGACGGAACTGTAATA
+>HWI-974
+TTGCGAGGTCGTCCTGGGAGACCAG
+>HWI-1009
+AACGTTGGAAGAAGCTCTGCAGC
+>HWI-1011
+TTTAATGTTAACACGGACATTGACC
+>HWI-1014
+TCTCCGACTTGCTGAGCTGTTTCCGCCG
+>HWI-1106
+CATCTTGTTATTCTATTGTCTTTGGTC
+>HWI-1202
+AAAGTAAATTCTGATGACTTCAAAAT
+>HWI-1227
+TACCATGTAAATTCGTTTCTTCG
+>HWI-1262
+TATTAGCTCAAAGAACAGCTCGT
+>HWI-1297
+TTGCAGCAATATCGTCAACATCCTC
+>HWI-1520
+TAATATCGAAGCCGAACTGAGAACA
+>HWI-1584
+TAAAGTTATGACAAGAATTGATGTT
+>HWI-1590
+TCATATGCCAATTTCGTGTTTCGATG
+>HWI-1609
+TAAGATTGAAAATTACTGTGGAGTAAT
+>HWI-1745
+TTCATCCTGCTGCCGGAGC
+>HWI-1816
+CTTCCGTTATTCCTGCAGACA
+>HWI-1845
+TAATATCGAACCCGAACTGAGAA
+>HWI-1852
+TGGACACCGTCGTTCCACTTGAACT
+>HWI-1887
+TCAAGGTAGGGTTGTCGCGTGTATTT
+>HWI-1892
+TATTATCGTCCGCTGCTAAACTG
+>HWI-1936
+TAGCTGCAACTTCTGAAGTTATGGC
+>HWI-1997
+CAAAACAAGAATTTTTCGCATGGTGCC
+>HWI-2055
+TAAACTAATTCTGTCGGTTTTCTGT
+>HWI-2075
+AGACTTTGAACTAAAAACAGTTACCT
+>HWI-2244
+TAACTTCTTCTATTTTCGTGCGGGA
+>HWI-2253
+TAACTGCCCTCGATCACGCCTTCCCGA
+>HWI-2277
+TGACGACGACAGTATCGCAAGACGGT
+>HWI-2409
+GTCAGATGAGAGACAAATTAGAAT
+>HWI-2418
+TAATATCGAACCCGAACTGAGAAC
+>HWI-2490
+TTTATGACAACCCGATCGGACCTCACTC
+>HWI-2557
+TGTCCGGAGATCAAGAAGTGTTGGCAC
+>HWI-2670
+TCAGATGAGAGACAAATTAGAATTAAA
+>HWI-2719
+TATTATCGTCCGCTGCTAAACTGCTG
+>HWI-2834
+CCTAGGAGAAGGAAGTCAAGAAGGCC