Repository 'sr_bowtie_dataset_annotation'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/sr_bowtie_dataset_annotation

Changeset 9:6bf9de09aa74 (2022-04-11)
Previous changeset 8:3519c2de7fac (2022-04-09) Next changeset 10:fd4a60fc3fca (2022-11-15)
Commit message:
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit c8f13ba73552ccf7db7c22859b7fdc6ad121cdf0"
modified:
sr_bowtie_dataset_annotation.xml
test-data/unmatched_2.fa
test-data/unmatched_3.fa
added:
test-data/unmatched_4.fa
test-data/unmatched_5.fa
test-data/unmatched_fastq.fa
b
diff -r 3519c2de7fac -r 6bf9de09aa74 sr_bowtie_dataset_annotation.xml
--- a/sr_bowtie_dataset_annotation.xml Sat Apr 09 22:45:21 2022 +0000
+++ b/sr_bowtie_dataset_annotation.xml Mon Apr 11 00:27:41 2022 +0000
[
@@ -1,4 +1,4 @@
-<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.6">
+<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.7">
   <description>by iterative alignments with sRbowtie</description>
   <requirements>
         <requirement type="package" version="1.3.1">bowtie</requirement>
@@ -24,7 +24,9 @@
         #elif $input[0].is_of_type('fastq'):
             #set format = "-q"
         #end if
-
+        
+        mkdir unmatched_dir &&
+        
         #for $file in $input:
             #set sample=$file.element_identifier
             bowtie -p \${GALAXY_SLOTS:-4}
@@ -58,12 +60,15 @@
             #end for
             remaining=\$(( \$(wc -l < class_unmatched.fa)/2)) &&
             echo -e "$sample\tNot classified\t\${remaining}\t\${genome_aligned}" >> $output &&
+            cp class_unmatched.fa unmatched_dir/${sample}_unmatched.fasta &&
+            #if $format == '-q':
+                mv unmatched_dir/${sample}_unmatched.fasta unmatched_dir/${sample}_unmatched.fastq &&
+                sed -n '1~4s/^@/>/p;2~4p' unmatched_dir/${sample}_unmatched.fastq > unmatched_dir/${sample}_unmatched.fasta &&
+                rm unmatched_dir/${sample}_unmatched.fastq &&
+            #end if
         #end for
+        ls -la unmatched_dir &&
         Rscript $__tool_directory__/barplot.r --input $output --barplot $barplot
-        #if $format == '-q':
-            && mv class_unmatched.fa class_unmatched.fastq
-            && sed -n '1~4s/^@/>/p;2~4p' class_unmatched.fastq > class_unmatched.fa
-        #end if
         ]]></command>
   <inputs>
     <param name="input" type="data" multiple="True" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/>
@@ -96,7 +101,9 @@
 <!-- End of other bowtie index selections -->
    </inputs>
    <outputs>
-       <data format="fasta" name="unmatched" label="Annotate smRNAs: Unmatched reads" from_work_dir="class_unmatched.fa" />
+       <collection name="unmatched" type="list" format="fasta" label="Annotate smRNAs: Unmatched reads">
+           <discover_datasets pattern="__name_and_ext__" directory="unmatched_dir" />
+       </collection>
        <data format="tabular" name="output" label="Cascade Annotation Analysis">
            <actions>
                <action name="column_names" type="metadata" default="Sample,Reference Index,Number of reads, Total reads" />
@@ -106,6 +113,22 @@
     </outputs>
     <tests>
         <test>
+            <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" />
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
+            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
+            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
+            <output name="output" ftype="tabular" file="multisample5_output.tab" />
+            <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" />
+            <output_collection name="unmatched" type="list" count="5">
+                <element name="sample5.fa_unmatched" file="unmatched_5.fa" ftype="fasta"/>
+                <element name="sample4.fa_unmatched" file="unmatched_4.fa" ftype="fasta"/>
+                <element name="sample3.fa_unmatched" file="unmatched_3.fa" ftype="fasta"/>
+                <element name="sample2.fa_unmatched" file="unmatched_2.fa" ftype="fasta"/>
+                <element name="sample1.fa_unmatched" file="unmatched_1.fa" ftype="fasta"/>
+            </output_collection>
+        </test>
+        <test>
             <param name="input" value ="sample1.fa" ftype="fasta" />
             <param name="genomeSource" value="history" />
             <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
@@ -113,7 +136,9 @@
             <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
             <output name="output" ftype="tabular" file="sample1_output.tab" />
             <output name="barplot" ftype="pdf" file="sample1_output.pdf" compare="sim_size" delta="500"/>
-            <output name="unmatched" ftype="fasta" file="unmatched_1.fa" />
+            <output_collection name="unmatched" type="list">
+                <element name="sample1.fa_unmatched" file="unmatched_1.fa" ftype="fasta"/>
+            </output_collection>
         </test>
         <test>
             <param name="input" value ="sample.fastq" ftype="fastq" />
@@ -123,17 +148,9 @@
             <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
             <output name="output" ftype="tabular" file="sample_output.tab" />
             <output name="barplot" ftype="pdf" file="sample_output.pdf" compare="sim_size" delta="500"/>
-            <output name="unmatched" ftype="fasta" file="unmatched_2.fa" />
-        </test>
-        <test>
-            <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" />
-            <param name="genomeSource" value="history" />
-            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
-            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
-            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
-            <output name="output" ftype="tabular" file="multisample5_output.tab" />
-            <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" />
-            <output name="unmatched" ftype="fasta" file="unmatched_3.fa" />
+            <output_collection name="unmatched" type="list">
+                <element name="sample.fastq_unmatched" file="unmatched_fastq.fa" ftype="fasta"/>
+            </output_collection>
         </test>
     </tests>
   <help>
b
diff -r 3519c2de7fac -r 6bf9de09aa74 test-data/unmatched_2.fa
--- a/test-data/unmatched_2.fa Sat Apr 09 22:45:21 2022 +0000
+++ b/test-data/unmatched_2.fa Mon Apr 11 00:27:41 2022 +0000
b
b'@@ -1,78 +1,444 @@\n->HWI-176\n-CGAGACTTTGAACTAAAAACAGT\n->HWI-277\n+>30787\n+TAACTGTTTTTAGTTCAAAGTCTCGGA\n+>30836\n+TAAATATTTTTTTTGAAACA\n+>30977\n+CGACTGTAATTATTAGCACAATACT\n+>30985\n+GACGATATTGCTGCAATAGACCTTGA\n+>31000\n TCAGATGAGAGACAAATTAGAAT\n->HWI-458\n-TTTCGAGGTTCCGAATTTTCTGTC\n->HWI-778\n-TAGGATGTGCTCTGCGGTTTCCACT\n->HWI-895\n-TTTAATTGGCGCAGTCGGTAGGATC\n->HWI-967\n-GAGAATGACGGAACTGTAATA\n->HWI-974\n-TTGCGAGGTCGTCCTGGGAGACCAG\n->HWI-1009\n-AACGTTGGAAGAAGCTCTGCAGC\n->HWI-1011\n-TTTAATGTTAACACGGACATTGACC\n->HWI-1014\n-TCTCCGACTTGCTGAGCTGTTTCCGCCG\n->HWI-1106\n-CATCTTGTTATTCTATTGTCTTTGGTC\n->HWI-1202\n-AAAGTAAATTCTGATGACTTCAAAAT\n->HWI-1227\n-TACCATGTAAATTCGTTTCTTCG\n->HWI-1262\n-TATTAGCTCAAAGAACAGCTCGT\n->HWI-1297\n-TTGCAGCAATATCGTCAACATCCTC\n->HWI-1520\n-TAATATCGAAGCCGAACTGAGAACA\n->HWI-1584\n-TAAAGTTATGACAAGAATTGATGTT\n->HWI-1590\n-TCATATGCCAATTTCGTGTTTCGATG\n->HWI-1609\n+>31256\n+TAATTCGGAATGCCTGCTCTACT\n+>31417\n+TCAATGATCGCTGTGCTCAGTAGGA\n+>31506\n+TTTGTCTGACGTTAAAAAATATA\n+>31567\n+TCCACATTAGGAGGATTATTAGACAAC\n+>31790\n+AAACATAATAATTGATGGCGGAAGA\n+>31872\n+AAGGTAATCATAGAGCACCACGGTT\n+>32157\n+TTTCTGTGAATTCACATGCTGATGA\n+>32192\n+TTTCTCATGTATAAAATGCTCTGATGG\n+>32223\n+TATCTTGTTATTCTAGTGTCTTTGGTT\n+>32338\n+TGTGGGACTCGAGCCAAAATGGCAACCT\n+>32497\n+TGCGGTTGGACAATTTTTTTTTTATA\n+>32506\n+TTGTTGTTTGGAGGAAGTTCCTTT\n+>32510\n+TCTTCCGCCATCAATTATTATGTTTT\n+>32522\n+TACTTGACTTTTCTATAGAATCTGGT\n+>32540\n+TATCGTCAACATCCTCGAACGATCGAGA\n+>32626\n+TTGATCAAGGTAGGGTTGTCGC\n+>32646\n+TCTGTTAAACACCCCTGAATCGTGGAT\n+>32657\n+TTTGGACATTTTGCAGGTGATACAAT\n+>32682\n+TAACTGTTTTTAGTTCAAAGTCTCGGA\n+>32716\n+TATCTATAGTTCCGATTGGCCATCTC\n+>32885\n+GAAAGTGGGTATCTGTATTTTAGGC\n+>32967\n+TAAAGATACCATCTAACCTCCTTGGA\n+>33075\n+ACAGATATTAGTGTTTTTCAAGCAGC\n+>33147\n+ATCGCAATTATGGCATAACAGATTCGGA\n+>33163\n+TTCAGCGTCGCTTGATTGAATAGAT\n+>33231\n+TAAGAACTTCTGAGGTGAAGGGC\n+>33242\n+TTGGGTTTAGAAATTAAAATTAAGGC\n+>33244\n+TATAGAAAGTTACGAATATATTAGGA\n+>33248\n+TTTTTGATCAATTGGCACCGTGCGAA\n+>33296\n+TGCAGAGCTTCTTCCAACGTTGGCAAG\n+>33318\n+TAGATGTCTGCAGGAATAACGGA\n+>33338\n+TCGACTATTAATGGCTGTTAGAATT\n+>33475\n+CAAACTTATCGACCATCTCCTCAAACG\n+>33601\n+TTACCCTTTTTCCGGAGCGTTTGTGC\n+>33611\n+AATGATCGCTGTGCTCAGTATGACGG\n+>33662\n+TTTGATTCATTACAATTTACGCTGAA\n+>33737\n+CAGATATTAGTGTTTTTCAAGCAGC\n+>33747\n+TACATGTAAAGCAGCTGTGTGTGC\n+>33815\n+AGTTTTTGGAATCACTTGA\n+>34032\n+TGCGGACGTGTGCTCGCTGCGTGA\n+>34200\n+TAATATATGTATAATCTGCTTGGTG\n+>34240\n+TCTTTACGCCATATAAATCATTTCGA\n+>34259\n+ACAAATCATAAATTTGATGGGACGA\n+>34268\n+TCTTGTGGACTTCACTCTAG\n+>34297\n+TAAGTAAATAGTCCCCGCCTTATTGAGG\n+>34337\n+TGGATAATGAATGTTTTTGATTTGC\n+>34569\n+TCTAGGTTCTTCTGAAATCGTGGGA\n+>34643\n+TCGTGTAGACCGGATAAGATTTTTT\n+>34697\n+TCTAATACTGTGAAAGGGTGGGG\n+>34808\n+GTAGGATGTGCTCTGCGGTTTCCAC\n+>34892\n+TCTGTCGCAGTTGTAGCTTGCAATA\n+>34896\n+TGTAGTTGCCACTTATGCTGTCCA\n+>35151\n+TAGCAATGTCCGTCTGTCCGTATGA\n+>35196\n+TGTAAATGGTCAGCGAAAGCAAAGG\n+>35211\n+CAGATATTAGTGTTTTTCAAGCAGCGG\n+>35246\n+CCTCAGAGAACGTCAGACCGCG\n+>35272\n+TGCAGAGCTTCTTCCAACGTTGG\n+>35273\n+TCTGGTAGTAAGAAAAATGTAGCTT\n+>35286\n+TATGTATGGATATGTATATTTATGGGT\n+>35317\n+TTTTGGTTTGATCGTCAGGTGGTC\n+>35512\n+GACGATAATAGTGAATTTTGGACA\n+>35566\n+TTCGAATTCGCGCCATTTCACAATC\n+>35948\n+TGGCCTGTATACGCTTTCTGTTG\n+>36009\n+TTGCAAAAGTCATATCTTGAGG\n+>36059\n+TAAGTTACTATGGATCCATAAGGGTA\n+>36100\n+TGATATGGGACTTGTAGCTTTTTTAAA\n+>36121\n+TGTCGTGGGCTGTGCGTTTGAGAA\n+>36235\n+TGCTACCTTTAGCTGCAAGATTAACT\n+>36238\n+CAGATATTAGTGTTTTTCAAGCAGCGG\n+>36523\n+TGCGAGTCCGAGCTTTGAACGTGGG\n+>36607\n+TCTCAATGTAATGTCTTCTTTTTGGA\n+>36624\n+CGTTCTTTAAAACCACCAATGGGA\n+>36776\n+TACTCACGACCATTGTCATTTCTCAAG\n+>36825\n+CGTTCGAGGTCCACTTTCTTAGCGGA\n+>36850\n+TCTGATGTCGGCGAGAAAGGAGTCTCA\n+>36872\n+TCAAGGCCAGCACACAGTAACATGGT\n+>36968\n+ACATTGGAAATACCGCGGGACCGC\n+>36994\n+TATACGTTTTTTGACCTCTTCTCTTTGA\n+>37110\n+TGATAATGTAAACAAAGATAAAGGG\n+>37210\n+TCAGATGAAAGACAAATTAGAATTAAA\n+>37224\n+GACGATAATAGTGAATTTTGGACA\n+>37226\n+TCTGAAGTTGCCGCACTAGAGATGG\n+>37339\n+TATACAGTCCACTATATCGTTGTTTAA\n+>37395\n+TACAATTTACGCTGAATTTAAATGAA\n+>37460\n+CACAAAGTAACGTGCACCACCATTT\n+>37766\n+TCTGTGCAACTTTGTACGCGAAGTCAGC\n+>37836\n+TCAAGGATTAATGTAGGGGGGGGG\n+>38096\n+TTTGGAGGTAGAAGTCTTAGTGGCCGC\n+>38100\n+GACAAAATATGCCCTTCAATTTAGA\n+>38113\n+TATGG'..b'AGTG\n+>41162\n+TACGGATTGCAGCGGCTAG\n+>41183\n+TTTTTTGGCACACGATTTTTTGGACGT\n+>41227\n+TAATATATGTATACTCTGCTTGGTG\n+>41265\n+TTTATGATTTTTGGTAATA\n+>41365\n+TAGGAGGGTTCCACAACTATTTCGGGG\n+>41837\n+TAGTTTGACACTGTTTGGAGACGTGG\n+>42053\n+TCTGTGGTCGAATCGAAGGAGTGC\n+>42113\n+TAATAGATCGCTCACCTGTTCCTGG\n+>42392\n+CGGCATCGGAAAACTCCCAGCGGGGC\n+>42504\n+TAACGTTATTATTATTTGAAAATAGAA\n+>42566\n+TTTGTTGGGTAGGAACTTTACTGC\n+>42668\n+TGCTGCAGAGCTTCTTCCAACGTTGG\n+>42751\n+CCAAAGTCTGGTTGTCAGAAAATGTGC\n+>42777\n+AAGGAAGGAACCAAAGAAGCACAAACG\n+>42804\n+TAATTCTAATTTGTCTCTCATCTGA\n+>42830\n+TAGTATACTTATTAAGTCATTTGA\n+>42857\n+AGAGTATTCATCTTGAGGCGTGTG\n+>42886\n+TGAACAACGATTTATGTATATAAGAA\n+>42899\n+TACGATAATAGTGAATTTTGGACA\n+>42936\n+TTTGACTAAAAGTCGCTTGTTTTGGA\n+>43050\n+AATACATAACTCTGGACACAGGAGA\n+>43219\n+TACTTTCGTCAAAATGTTCAGGAGCT\n+>43242\n+TGACATGTCTATTTCCATGGGTTCGGA\n+>43268\n+GTGTATAAAAAAATTTATTGTTGAGCA\n+>43299\n+TCTGGATGATGGCTGATGCTCGTTG\n+>43491\n+TCTGATGACAATGAATTTTTTAGACA\n+>43520\n+TATTGTTGTAATTGCTGCCTCGGTTG\n+>43654\n+ATATGAACAAAGCAAAGACACTAGAA\n+>43677\n+CACGATTCATAACCCTCAGCTGAAG\n+>43776\n+GCTCCTGGCAACTCTGTGATGGAC\n+>43903\n+TATTTTGATGTTTAATGAA\n+>43990\n+TTTCTACTTCGTATTATTTTTATGA\n+>44062\n+AAACATAATAATTGATGGCGGAAGA\n+>44071\n+CACAGACGCAGTGGAAACCGCAGA\n+>44078\n+CGTGCGTCCGAGCAAAAGGTGGT\n+>44187\n+TTATGTAAGAATATTTGTCATTAGA\n+>44239\n+TATCAATGTTGACCGTAATACTCAA\n+>44253\n+TGGATATTGAATGTTTTTGATTTGC\n+>44279\n+TGTGATTTTCCCAATTTATATTAATACA\n+>44374\n+TAGTCGGAGTTGATGAGCTGCC\n+>44412\n+TCCGAATTTTCTGTCGCAGTTGTAGCT\n+>44423\n+TCGGCTTGGGTTTAGAAATTAAAAT\n+>44558\n+ACAAATCATAAATTTGGTGGGA\n+>44637\n+TTTTTTATCAATTGGCACCATGCGAA\n+>44642\n+TACTGTGTGCTGGCCTTGATGAAAGT\n+>44688\n+TAGATGTCTGCACGAATAACGGA\n+>44945\n+TGCTTCCGAGCAATCTACGTTGGTAAAA\n+>45054\n+TCTCATCTGACAATTTTTTAAAAGCGA\n+>45088\n+TCTGAAGCAGCGCTCACGGCAGAATGC\n+>45203\n+TGCCGGATTTTGATCCAATCAAGGGA\n+>45263\n+TATTGATTTTCCTATTTAGTTGAACA\n+>45274\n+AAAAGTCTGGATATTGTAGGATAGGA\n+>45358\n+CAACGCTGGACCTTGGACTCGAGGGC\n+>45396\n+CATCTTGTTATTCTAGTGTCTTTGGTA\n+>45641\n+GAACTAAGATCAGTCGACTGTAATTAT\n+>45774\n+CGACGAACTAGCAGCTCTGGTGT\n+>45911\n+CAGATATTAGTGTTTTTCAAGCAGC\n+>45997\n+TTAACCAGTCGGCGTTGTTTAAGTAGC\n+>46031\n+AAAGCGTCTACTTGAACAATGAGA\n+>46054\n+TTAGATCGTATTACTTGGGTGCTGG\n+>46199\n+TAAGCGTTAGGTCGTATTACTTGGGC\n+>46255\n TAAGATTGAAAATTACTGTGGAGTAAT\n->HWI-1745\n-TTCATCCTGCTGCCGGAGC\n->HWI-1816\n-CTTCCGTTATTCCTGCAGACA\n->HWI-1845\n-TAATATCGAACCCGAACTGAGAA\n->HWI-1852\n-TGGACACCGTCGTTCCACTTGAACT\n->HWI-1887\n-TCAAGGTAGGGTTGTCGCGTGTATTT\n->HWI-1892\n-TATTATCGTCCGCTGCTAAACTG\n->HWI-1936\n-TAGCTGCAACTTCTGAAGTTATGGC\n->HWI-1997\n-CAAAACAAGAATTTTTCGCATGGTGCC\n->HWI-2055\n-TAAACTAATTCTGTCGGTTTTCTGT\n->HWI-2075\n-AGACTTTGAACTAAAAACAGTTACCT\n->HWI-2244\n-TAACTTCTTCTATTTTCGTGCGGGA\n->HWI-2253\n-TAACTGCCCTCGATCACGCCTTCCCGA\n->HWI-2277\n-TGACGACGACAGTATCGCAAGACGGT\n->HWI-2409\n-GTCAGATGAGAGACAAATTAGAAT\n->HWI-2418\n-TAATATCGAACCCGAACTGAGAAC\n->HWI-2490\n-TTTATGACAACCCGATCGGACCTCACTC\n->HWI-2557\n-TGTCCGGAGATCAAGAAGTGTTGGCAC\n->HWI-2670\n-TCAGATGAGAGACAAATTAGAATTAAA\n->HWI-2719\n-TATTATCGTCCGCTGCTAAACTGCTG\n->HWI-2834\n-CCTAGGAGAAGGAAGTCAAGAAGGCC\n+>46366\n+TAAGGAATTGTCGGCCATTTAATGTGA\n+>46387\n+CAGAGCTTCTTCCAACGTTGGCAAG\n+>46701\n+TTGTATCTTTTTGCTTTTTATATT\n+>46705\n+TAACTGTTTTTAGTTCAAAGTCTCGGA\n+>46783\n+TCTCATCTGACAATTTTTTAAAAGCGA\n+>46836\n+TCATAAGGACAGACGGACAT\n+>46892\n+AAACATAATAATTGATGGCGGAAGA\n+>47244\n+TCTCTGTCCGCTCGCTTACGATGAGA\n+>47290\n+TTTCAGGAATGGGGTCGTCCCACTA\n+>47348\n+TGCAAAAGTAATATCAAAGACACTA\n+>47359\n+TTTAAAATTATAATAGTCAGGG\n+>47427\n+CATCTTGTTATTCTAGTGTCTTTGGT\n+>47654\n+TTAAACACTGAATTCGGTTTCGAAA\n+>47656\n+TTTCTGAAGAATCCTGTAACTCCC\n+>47785\n+TGTAGATGAGCGGCAAATGTGG\n+>47827\n+TTGATCAAGGTAGAGTTGTCGCGC\n+>48184\n+TTCAAGGATTAATGTAGGGGGGG\n+>48576\n+TTAACCCGGAGACTTGGGTGTGGGT\n+>48764\n+TGCAAAAGTAATATCAAAGACAATAGA\n+>48782\n+TGACAATGTAGTGAACGCCAGTGT\n+>48893\n+TATAAATGCCGTCTGATATTATTAAA\n+>49017\n+ACCGGATGTCATATCCAGCGTCGTGAA\n+>49381\n+TACAATGTAAATTCGTTTCTTCGATCA\n+>49456\n+TTTTGGTTTGATCGTCAGGTGGACGC\n+>49484\n+TCTGCTATCATTGACTCGATCATTGA\n+>49569\n+TCGATTGTATGATCAGTGGAAGTGGC\n+>49602\n+GAAATTGGCCAACATTAATTCGGAA\n+>49628\n+TACTTTCGTCAAAATGTTCAGGAGC\n+>49715\n+TTTGTCCGGGTGCTTCGAAAGAACTCT\n+>49778\n+CAATAGCGTCGCTGAGTAACAGTG\n'
b
diff -r 3519c2de7fac -r 6bf9de09aa74 test-data/unmatched_3.fa
--- a/test-data/unmatched_3.fa Sat Apr 09 22:45:21 2022 +0000
+++ b/test-data/unmatched_3.fa Mon Apr 11 00:27:41 2022 +0000
b
@@ -1,177 +1,3 @@
->25207
-TCGCACTAGCTTCGGTCGTCTCGGA
->25229
-TCTGTTCGAGGTTCTTTATTTGAA
->25283
-TAAGGGAGCGAGATCGTTCGATGCA
->25363
-TAGTGTTGGGTGTAGGCCAGGGAG
->25376
-TCCGCCGACAATGTTGCAACATGA
->25435
-TTTCTTCAAGCTGCGCGTTTTTCGG
->25593
-TCGAAAACTTCTTCGCACTCTTCGTT
->25660
-TCTCGCTGCGTGAACGATGAAGGC
->25968
-TTTGCTAGGACTGCTGCATAAG
->26220
-GCCCGTGTCGATGAATGCTTCAAACT
->26261
-TATTATCGTCCGCTGCTAAACTGC
->26543
-CTTATGTATAAAAAGCTCTGATGGA
->26610
-TCCTTAAGTTCGTTGATTTGGGCT
->26675
-CAATATCGTCAACATCCTCGAACGAT
->26873
-TTGGGAGCTGAATCCCGTTACGGTA
->26978
-CGCGCATTAGCTCAAATCTAGTTGGA
->27220
-TGTTTATTTGTCAAGTTTAGATAATA
->27397
-TAGTTCACAGCTGTATGTCCAGATGGGT
->27869
-TATGGTCCAGAATGTAGCCTCGGC
->27878
-TATTAATCGGGCCACAAATATCGGTA
->28035
-GGTATTCTTTGCGAGGTCGTCCTGG
->28154
-AAAGACGAGAACGCGTATATGTGTGC
->28319
-TGACCAACCTTAAAAGATCGGGGT
->28387
-TATTACTATTTCTAAGCTTTGTTTAAA
->28594
-TTTTGAGGTTGGTCAAGAAGTTGTT
->28608
-TAAGATTGAAAATTACTGTGGAGT
->28668
-TACAGATTCTAGAGACAAAGACGC
->28674
-TAAGTACATGCGCCCAGCCGCCGTGA
->28836
-TCTGGTTAAGGTCGGAATACTCGTCT
->29056
-TGCTTTACATACCCTTTGGTGCCC
->29323
-TTTTGCTGCAGAGCTTCTTCCAACGT
->29342
-TATGTATGGATATGTATATTTATGGT
->29387
-TGATAATCGACCTCTTCCATCGTTGT
->29423
-TCTAAGAACTTCTGAGGTGAAGG
->29462
-TGCATTTCAATCGGAAGAGTACTCTG
->29492
-TAAAACACAAATCTCGACATACAGA
->29703
-GTACAGGTTCTGATGACAATG
->29785
-TCAGATGAAAGACAAATTAGAATT
->30053
-TAAGAACTTCTGAGATGAAGGGC
->30096
-TGGATATTGAATGTTTTTGATTTGC
->30136
-TGTAGTCGTCGTATGTCCGGA
->30272
-TGCATTCGTGGATTCGCATTCGAGA
->30497
-TGACAAAATATGCCCTTCACCTCAGA
->30787
-TAACTGTTTTTAGTTCAAAGTCTCGGA
->30836
-TAAATATTTTTTTTGAAACA
->30977
-CGACTGTAATTATTAGCACAATACT
->30985
-GACGATATTGCTGCAATAGACCTTGA
->31000
-TCAGATGAGAGACAAATTAGAAT
->31256
-TAATTCGGAATGCCTGCTCTACT
->31417
-TCAATGATCGCTGTGCTCAGTAGGA
->31506
-TTTGTCTGACGTTAAAAAATATA
->31567
-TCCACATTAGGAGGATTATTAGACAAC
->31790
-AAACATAATAATTGATGGCGGAAGA
->31872
-AAGGTAATCATAGAGCACCACGGTT
->32157
-TTTCTGTGAATTCACATGCTGATGA
->32192
-TTTCTCATGTATAAAATGCTCTGATGG
->32223
-TATCTTGTTATTCTAGTGTCTTTGGTT
->32338
-TGTGGGACTCGAGCCAAAATGGCAACCT
->32497
-TGCGGTTGGACAATTTTTTTTTTATA
->32506
-TTGTTGTTTGGAGGAAGTTCCTTT
->32510
-TCTTCCGCCATCAATTATTATGTTTT
->32522
-TACTTGACTTTTCTATAGAATCTGGT
->32540
-TATCGTCAACATCCTCGAACGATCGAGA
->32626
-TTGATCAAGGTAGGGTTGTCGC
->32646
-TCTGTTAAACACCCCTGAATCGTGGAT
->32657
-TTTGGACATTTTGCAGGTGATACAAT
->32682
-TAACTGTTTTTAGTTCAAAGTCTCGGA
->32716
-TATCTATAGTTCCGATTGGCCATCTC
->32885
-GAAAGTGGGTATCTGTATTTTAGGC
->32967
-TAAAGATACCATCTAACCTCCTTGGA
->33075
-ACAGATATTAGTGTTTTTCAAGCAGC
->33147
-ATCGCAATTATGGCATAACAGATTCGGA
->33163
-TTCAGCGTCGCTTGATTGAATAGAT
->33231
-TAAGAACTTCTGAGGTGAAGGGC
->33242
-TTGGGTTTAGAAATTAAAATTAAGGC
->33244
-TATAGAAAGTTACGAATATATTAGGA
->33248
-TTTTTGATCAATTGGCACCGTGCGAA
->33296
-TGCAGAGCTTCTTCCAACGTTGGCAAG
->33318
-TAGATGTCTGCAGGAATAACGGA
->33338
-TCGACTATTAATGGCTGTTAGAATT
->33475
-CAAACTTATCGACCATCTCCTCAAACG
->33601
-TTACCCTTTTTCCGGAGCGTTTGTGC
->33611
-AATGATCGCTGTGCTCAGTATGACGG
->33662
-TTTGATTCATTACAATTTACGCTGAA
->33737
-CAGATATTAGTGTTTTTCAAGCAGC
->33747
-TACATGTAAAGCAGCTGTGTGTGC
->33815
-AGTTTTTGGAATCACTTGA
 >34032
 TGCGGACGTGTGCTCGCTGCGTGA
 >34200
b
diff -r 3519c2de7fac -r 6bf9de09aa74 test-data/unmatched_4.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_4.fa Mon Apr 11 00:27:41 2022 +0000
b
@@ -0,0 +1,332 @@
+>30787
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>30836
+TAAATATTTTTTTTGAAACA
+>30977
+CGACTGTAATTATTAGCACAATACT
+>30985
+GACGATATTGCTGCAATAGACCTTGA
+>31000
+TCAGATGAGAGACAAATTAGAAT
+>31256
+TAATTCGGAATGCCTGCTCTACT
+>31417
+TCAATGATCGCTGTGCTCAGTAGGA
+>31506
+TTTGTCTGACGTTAAAAAATATA
+>31567
+TCCACATTAGGAGGATTATTAGACAAC
+>31790
+AAACATAATAATTGATGGCGGAAGA
+>31872
+AAGGTAATCATAGAGCACCACGGTT
+>32157
+TTTCTGTGAATTCACATGCTGATGA
+>32192
+TTTCTCATGTATAAAATGCTCTGATGG
+>32223
+TATCTTGTTATTCTAGTGTCTTTGGTT
+>32338
+TGTGGGACTCGAGCCAAAATGGCAACCT
+>32497
+TGCGGTTGGACAATTTTTTTTTTATA
+>32506
+TTGTTGTTTGGAGGAAGTTCCTTT
+>32510
+TCTTCCGCCATCAATTATTATGTTTT
+>32522
+TACTTGACTTTTCTATAGAATCTGGT
+>32540
+TATCGTCAACATCCTCGAACGATCGAGA
+>32626
+TTGATCAAGGTAGGGTTGTCGC
+>32646
+TCTGTTAAACACCCCTGAATCGTGGAT
+>32657
+TTTGGACATTTTGCAGGTGATACAAT
+>32682
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>32716
+TATCTATAGTTCCGATTGGCCATCTC
+>32885
+GAAAGTGGGTATCTGTATTTTAGGC
+>32967
+TAAAGATACCATCTAACCTCCTTGGA
+>33075
+ACAGATATTAGTGTTTTTCAAGCAGC
+>33147
+ATCGCAATTATGGCATAACAGATTCGGA
+>33163
+TTCAGCGTCGCTTGATTGAATAGAT
+>33231
+TAAGAACTTCTGAGGTGAAGGGC
+>33242
+TTGGGTTTAGAAATTAAAATTAAGGC
+>33244
+TATAGAAAGTTACGAATATATTAGGA
+>33248
+TTTTTGATCAATTGGCACCGTGCGAA
+>33296
+TGCAGAGCTTCTTCCAACGTTGGCAAG
+>33318
+TAGATGTCTGCAGGAATAACGGA
+>33338
+TCGACTATTAATGGCTGTTAGAATT
+>33475
+CAAACTTATCGACCATCTCCTCAAACG
+>33601
+TTACCCTTTTTCCGGAGCGTTTGTGC
+>33611
+AATGATCGCTGTGCTCAGTATGACGG
+>33662
+TTTGATTCATTACAATTTACGCTGAA
+>33737
+CAGATATTAGTGTTTTTCAAGCAGC
+>33747
+TACATGTAAAGCAGCTGTGTGTGC
+>33815
+AGTTTTTGGAATCACTTGA
+>34032
+TGCGGACGTGTGCTCGCTGCGTGA
+>34200
+TAATATATGTATAATCTGCTTGGTG
+>34240
+TCTTTACGCCATATAAATCATTTCGA
+>34259
+ACAAATCATAAATTTGATGGGACGA
+>34268
+TCTTGTGGACTTCACTCTAG
+>34297
+TAAGTAAATAGTCCCCGCCTTATTGAGG
+>34337
+TGGATAATGAATGTTTTTGATTTGC
+>34569
+TCTAGGTTCTTCTGAAATCGTGGGA
+>34643
+TCGTGTAGACCGGATAAGATTTTTT
+>34697
+TCTAATACTGTGAAAGGGTGGGG
+>34808
+GTAGGATGTGCTCTGCGGTTTCCAC
+>34892
+TCTGTCGCAGTTGTAGCTTGCAATA
+>34896
+TGTAGTTGCCACTTATGCTGTCCA
+>35151
+TAGCAATGTCCGTCTGTCCGTATGA
+>35196
+TGTAAATGGTCAGCGAAAGCAAAGG
+>35211
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>35246
+CCTCAGAGAACGTCAGACCGCG
+>35272
+TGCAGAGCTTCTTCCAACGTTGG
+>35273
+TCTGGTAGTAAGAAAAATGTAGCTT
+>35286
+TATGTATGGATATGTATATTTATGGGT
+>35317
+TTTTGGTTTGATCGTCAGGTGGTC
+>35512
+GACGATAATAGTGAATTTTGGACA
+>35566
+TTCGAATTCGCGCCATTTCACAATC
+>35948
+TGGCCTGTATACGCTTTCTGTTG
+>36009
+TTGCAAAAGTCATATCTTGAGG
+>36059
+TAAGTTACTATGGATCCATAAGGGTA
+>36100
+TGATATGGGACTTGTAGCTTTTTTAAA
+>36121
+TGTCGTGGGCTGTGCGTTTGAGAA
+>36235
+TGCTACCTTTAGCTGCAAGATTAACT
+>36238
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>36523
+TGCGAGTCCGAGCTTTGAACGTGGG
+>36607
+TCTCAATGTAATGTCTTCTTTTTGGA
+>36624
+CGTTCTTTAAAACCACCAATGGGA
+>36776
+TACTCACGACCATTGTCATTTCTCAAG
+>36825
+CGTTCGAGGTCCACTTTCTTAGCGGA
+>36850
+TCTGATGTCGGCGAGAAAGGAGTCTCA
+>36872
+TCAAGGCCAGCACACAGTAACATGGT
+>36968
+ACATTGGAAATACCGCGGGACCGC
+>36994
+TATACGTTTTTTGACCTCTTCTCTTTGA
+>37110
+TGATAATGTAAACAAAGATAAAGGG
+>37210
+TCAGATGAAAGACAAATTAGAATTAAA
+>37224
+GACGATAATAGTGAATTTTGGACA
+>37226
+TCTGAAGTTGCCGCACTAGAGATGG
+>37339
+TATACAGTCCACTATATCGTTGTTTAA
+>37395
+TACAATTTACGCTGAATTTAAATGAA
+>37460
+CACAAAGTAACGTGCACCACCATTT
+>37766
+TCTGTGCAACTTTGTACGCGAAGTCAGC
+>37836
+TCAAGGATTAATGTAGGGGGGGGG
+>38096
+TTTGGAGGTAGAAGTCTTAGTGGCCGC
+>38100
+GACAAAATATGCCCTTCAATTTAGA
+>38113
+TATGGTCCAGAATGTAGCCTCGGC
+>38213
+TAACTGTTTTTAGTTCAAAGTCTCGAA
+>38224
+TTTCAGTTTTTATTGTTAGTCACAGG
+>38263
+TAACTGCGAAGTCGATCAGGTCCGA
+>38266
+ATCCGGACGATTGACGAGGAGCCCATT
+>38271
+TTATGTCAGTGTCGAAGGCGATCGAA
+>38373
+TCTATAGCCTTGGCGTAGGAACTCGCA
+>38384
+GCTACAACTGCGACAGAAAATTCGGA
+>38468
+TATTATCGTCCGCTGCTAAACTGC
+>38491
+TGCATACTTCCGTTCTCTTTTCGGGA
+>38673
+TTGTGTATTGTGATTCTGATTCGTG
+>38745
+TCCCTGGCCCGCTAGACAGCAGGA
+>38784
+TGTGCTAATAATTAAAGTCGACTGA
+>38873
+TGTCATACTTTCGTCAAAA
+>39154
+TAATAGTCAGGGCGCGAATTTTTAAAA
+>39325
+TTAGGACTTATTGAACTTTACGGTA
+>39359
+TATGCAAATCAAGTGTGACCGTAGCT
+>39450
+TTAAACAACGATATAGTGGACAGTA
+>39453
+TCTGCCGGATTTTGATCCAATCAAGG
+>39819
+GTGGAATTGAAAAAGAACCAGACACA
+>40040
+TTTTTGGAACTACCTGAGTCGGTT
+>40183
+TAACACAAAGCAGTATGATTTAATAAT
+>40189
+CAGCAAGCTGAGATGTACATTAGTATA
+>40374
+TACGTTTTCTTGCAGATCAAAAA
+>40445
+CGTTCTTTAAAACCACCAATGGGA
+>40467
+TACGCAGATTCCTGGGAGTTACAGGA
+>40488
+TGATTTGGGCTTGCATACTTGTACT
+>40797
+TATATTCGTGTTCATGTGTGAACAGC
+>40831
+TCTAAGAACTTCTGAGGTGAAGGGC
+>40951
+TTCTTCGTAAGTCAAAATAGTGTCGCC
+>40988
+TATGATTGATTGCTTGAGAGT
+>40998
+TGATAGAGCTGCATTTGAATTAACGG
+>41061
+CTCTTTCCGCTCACTCCCGCTGAGA
+>41076
+TGCATTAAGAAGATTTAGGATCC
+>41140
+TCAAGGATTAATGTAGGGGGGG
+>41143
+CAATAGCGTCGCTGAGTAACAGTG
+>41162
+TACGGATTGCAGCGGCTAG
+>41183
+TTTTTTGGCACACGATTTTTTGGACGT
+>41227
+TAATATATGTATACTCTGCTTGGTG
+>41265
+TTTATGATTTTTGGTAATA
+>41365
+TAGGAGGGTTCCACAACTATTTCGGGG
+>41837
+TAGTTTGACACTGTTTGGAGACGTGG
+>42053
+TCTGTGGTCGAATCGAAGGAGTGC
+>42113
+TAATAGATCGCTCACCTGTTCCTGG
+>42392
+CGGCATCGGAAAACTCCCAGCGGGGC
+>42504
+TAACGTTATTATTATTTGAAAATAGAA
+>42566
+TTTGTTGGGTAGGAACTTTACTGC
+>42668
+TGCTGCAGAGCTTCTTCCAACGTTGG
+>42751
+CCAAAGTCTGGTTGTCAGAAAATGTGC
+>42777
+AAGGAAGGAACCAAAGAAGCACAAACG
+>42804
+TAATTCTAATTTGTCTCTCATCTGA
+>42830
+TAGTATACTTATTAAGTCATTTGA
+>42857
+AGAGTATTCATCTTGAGGCGTGTG
+>42886
+TGAACAACGATTTATGTATATAAGAA
+>42899
+TACGATAATAGTGAATTTTGGACA
+>42936
+TTTGACTAAAAGTCGCTTGTTTTGGA
+>43050
+AATACATAACTCTGGACACAGGAGA
+>43219
+TACTTTCGTCAAAATGTTCAGGAGCT
+>43242
+TGACATGTCTATTTCCATGGGTTCGGA
+>43268
+GTGTATAAAAAAATTTATTGTTGAGCA
+>43299
+TCTGGATGATGGCTGATGCTCGTTG
+>43491
+TCTGATGACAATGAATTTTTTAGACA
+>43520
+TATTGTTGTAATTGCTGCCTCGGTTG
+>43654
+ATATGAACAAAGCAAAGACACTAGAA
+>43677
+CACGATTCATAACCCTCAGCTGAAG
+>43776
+GCTCCTGGCAACTCTGTGATGGAC
+>43903
+TATTTTGATGTTTAATGAA
+>43990
+TTTCTACTTCGTATTATTTTTATGA
+>44062
+AAACATAATAATTGATGGCGGAAGA
+>44071
+CACAGACGCAGTGGAAACCGCAGA
+>44078
+CGTGCGTCCGAGCAAAAGGTGGT
+>44187
+TTATGTAAGAATATTTGTCATTAGA
b
diff -r 3519c2de7fac -r 6bf9de09aa74 test-data/unmatched_5.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_5.fa Mon Apr 11 00:27:41 2022 +0000
b
@@ -0,0 +1,412 @@
+>30787
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>30836
+TAAATATTTTTTTTGAAACA
+>30977
+CGACTGTAATTATTAGCACAATACT
+>30985
+GACGATATTGCTGCAATAGACCTTGA
+>31000
+TCAGATGAGAGACAAATTAGAAT
+>31256
+TAATTCGGAATGCCTGCTCTACT
+>31417
+TCAATGATCGCTGTGCTCAGTAGGA
+>31506
+TTTGTCTGACGTTAAAAAATATA
+>31567
+TCCACATTAGGAGGATTATTAGACAAC
+>31790
+AAACATAATAATTGATGGCGGAAGA
+>31872
+AAGGTAATCATAGAGCACCACGGTT
+>32157
+TTTCTGTGAATTCACATGCTGATGA
+>32192
+TTTCTCATGTATAAAATGCTCTGATGG
+>32223
+TATCTTGTTATTCTAGTGTCTTTGGTT
+>32338
+TGTGGGACTCGAGCCAAAATGGCAACCT
+>32497
+TGCGGTTGGACAATTTTTTTTTTATA
+>32506
+TTGTTGTTTGGAGGAAGTTCCTTT
+>32510
+TCTTCCGCCATCAATTATTATGTTTT
+>32522
+TACTTGACTTTTCTATAGAATCTGGT
+>32540
+TATCGTCAACATCCTCGAACGATCGAGA
+>32626
+TTGATCAAGGTAGGGTTGTCGC
+>32646
+TCTGTTAAACACCCCTGAATCGTGGAT
+>32657
+TTTGGACATTTTGCAGGTGATACAAT
+>32682
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>32716
+TATCTATAGTTCCGATTGGCCATCTC
+>32885
+GAAAGTGGGTATCTGTATTTTAGGC
+>32967
+TAAAGATACCATCTAACCTCCTTGGA
+>33075
+ACAGATATTAGTGTTTTTCAAGCAGC
+>33147
+ATCGCAATTATGGCATAACAGATTCGGA
+>33163
+TTCAGCGTCGCTTGATTGAATAGAT
+>33231
+TAAGAACTTCTGAGGTGAAGGGC
+>33242
+TTGGGTTTAGAAATTAAAATTAAGGC
+>33244
+TATAGAAAGTTACGAATATATTAGGA
+>33248
+TTTTTGATCAATTGGCACCGTGCGAA
+>33296
+TGCAGAGCTTCTTCCAACGTTGGCAAG
+>33318
+TAGATGTCTGCAGGAATAACGGA
+>33338
+TCGACTATTAATGGCTGTTAGAATT
+>33475
+CAAACTTATCGACCATCTCCTCAAACG
+>33601
+TTACCCTTTTTCCGGAGCGTTTGTGC
+>33611
+AATGATCGCTGTGCTCAGTATGACGG
+>33662
+TTTGATTCATTACAATTTACGCTGAA
+>33737
+CAGATATTAGTGTTTTTCAAGCAGC
+>33747
+TACATGTAAAGCAGCTGTGTGTGC
+>33815
+AGTTTTTGGAATCACTTGA
+>34032
+TGCGGACGTGTGCTCGCTGCGTGA
+>34200
+TAATATATGTATAATCTGCTTGGTG
+>34240
+TCTTTACGCCATATAAATCATTTCGA
+>34259
+ACAAATCATAAATTTGATGGGACGA
+>34268
+TCTTGTGGACTTCACTCTAG
+>34297
+TAAGTAAATAGTCCCCGCCTTATTGAGG
+>34337
+TGGATAATGAATGTTTTTGATTTGC
+>34569
+TCTAGGTTCTTCTGAAATCGTGGGA
+>34643
+TCGTGTAGACCGGATAAGATTTTTT
+>34697
+TCTAATACTGTGAAAGGGTGGGG
+>34808
+GTAGGATGTGCTCTGCGGTTTCCAC
+>34892
+TCTGTCGCAGTTGTAGCTTGCAATA
+>34896
+TGTAGTTGCCACTTATGCTGTCCA
+>35151
+TAGCAATGTCCGTCTGTCCGTATGA
+>35196
+TGTAAATGGTCAGCGAAAGCAAAGG
+>35211
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>35246
+CCTCAGAGAACGTCAGACCGCG
+>35272
+TGCAGAGCTTCTTCCAACGTTGG
+>35273
+TCTGGTAGTAAGAAAAATGTAGCTT
+>35286
+TATGTATGGATATGTATATTTATGGGT
+>35317
+TTTTGGTTTGATCGTCAGGTGGTC
+>35512
+GACGATAATAGTGAATTTTGGACA
+>35566
+TTCGAATTCGCGCCATTTCACAATC
+>35948
+TGGCCTGTATACGCTTTCTGTTG
+>36009
+TTGCAAAAGTCATATCTTGAGG
+>36059
+TAAGTTACTATGGATCCATAAGGGTA
+>36100
+TGATATGGGACTTGTAGCTTTTTTAAA
+>36121
+TGTCGTGGGCTGTGCGTTTGAGAA
+>36235
+TGCTACCTTTAGCTGCAAGATTAACT
+>36238
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>36523
+TGCGAGTCCGAGCTTTGAACGTGGG
+>36607
+TCTCAATGTAATGTCTTCTTTTTGGA
+>36624
+CGTTCTTTAAAACCACCAATGGGA
+>36776
+TACTCACGACCATTGTCATTTCTCAAG
+>36825
+CGTTCGAGGTCCACTTTCTTAGCGGA
+>36850
+TCTGATGTCGGCGAGAAAGGAGTCTCA
+>36872
+TCAAGGCCAGCACACAGTAACATGGT
+>36968
+ACATTGGAAATACCGCGGGACCGC
+>36994
+TATACGTTTTTTGACCTCTTCTCTTTGA
+>37110
+TGATAATGTAAACAAAGATAAAGGG
+>37210
+TCAGATGAAAGACAAATTAGAATTAAA
+>37224
+GACGATAATAGTGAATTTTGGACA
+>37226
+TCTGAAGTTGCCGCACTAGAGATGG
+>37339
+TATACAGTCCACTATATCGTTGTTTAA
+>37395
+TACAATTTACGCTGAATTTAAATGAA
+>37460
+CACAAAGTAACGTGCACCACCATTT
+>37766
+TCTGTGCAACTTTGTACGCGAAGTCAGC
+>37836
+TCAAGGATTAATGTAGGGGGGGGG
+>38096
+TTTGGAGGTAGAAGTCTTAGTGGCCGC
+>38100
+GACAAAATATGCCCTTCAATTTAGA
+>38113
+TATGGTCCAGAATGTAGCCTCGGC
+>38213
+TAACTGTTTTTAGTTCAAAGTCTCGAA
+>38224
+TTTCAGTTTTTATTGTTAGTCACAGG
+>38263
+TAACTGCGAAGTCGATCAGGTCCGA
+>38266
+ATCCGGACGATTGACGAGGAGCCCATT
+>38271
+TTATGTCAGTGTCGAAGGCGATCGAA
+>38373
+TCTATAGCCTTGGCGTAGGAACTCGCA
+>38384
+GCTACAACTGCGACAGAAAATTCGGA
+>38468
+TATTATCGTCCGCTGCTAAACTGC
+>38491
+TGCATACTTCCGTTCTCTTTTCGGGA
+>38673
+TTGTGTATTGTGATTCTGATTCGTG
+>38745
+TCCCTGGCCCGCTAGACAGCAGGA
+>38784
+TGTGCTAATAATTAAAGTCGACTGA
+>38873
+TGTCATACTTTCGTCAAAA
+>39154
+TAATAGTCAGGGCGCGAATTTTTAAAA
+>39325
+TTAGGACTTATTGAACTTTACGGTA
+>39359
+TATGCAAATCAAGTGTGACCGTAGCT
+>39450
+TTAAACAACGATATAGTGGACAGTA
+>39453
+TCTGCCGGATTTTGATCCAATCAAGG
+>39819
+GTGGAATTGAAAAAGAACCAGACACA
+>40040
+TTTTTGGAACTACCTGAGTCGGTT
+>40183
+TAACACAAAGCAGTATGATTTAATAAT
+>40189
+CAGCAAGCTGAGATGTACATTAGTATA
+>40374
+TACGTTTTCTTGCAGATCAAAAA
+>40445
+CGTTCTTTAAAACCACCAATGGGA
+>40467
+TACGCAGATTCCTGGGAGTTACAGGA
+>40488
+TGATTTGGGCTTGCATACTTGTACT
+>40797
+TATATTCGTGTTCATGTGTGAACAGC
+>40831
+TCTAAGAACTTCTGAGGTGAAGGGC
+>40951
+TTCTTCGTAAGTCAAAATAGTGTCGCC
+>40988
+TATGATTGATTGCTTGAGAGT
+>42668
+TGCTGCAGAGCTTCTTCCAACGTTGG
+>42751
+CCAAAGTCTGGTTGTCAGAAAATGTGC
+>42777
+AAGGAAGGAACCAAAGAAGCACAAACG
+>42804
+TAATTCTAATTTGTCTCTCATCTGA
+>42830
+TAGTATACTTATTAAGTCATTTGA
+>42857
+AGAGTATTCATCTTGAGGCGTGTG
+>42886
+TGAACAACGATTTATGTATATAAGAA
+>42899
+TACGATAATAGTGAATTTTGGACA
+>42936
+TTTGACTAAAAGTCGCTTGTTTTGGA
+>43050
+AATACATAACTCTGGACACAGGAGA
+>43219
+TACTTTCGTCAAAATGTTCAGGAGCT
+>43242
+TGACATGTCTATTTCCATGGGTTCGGA
+>43268
+GTGTATAAAAAAATTTATTGTTGAGCA
+>43299
+TCTGGATGATGGCTGATGCTCGTTG
+>43491
+TCTGATGACAATGAATTTTTTAGACA
+>43520
+TATTGTTGTAATTGCTGCCTCGGTTG
+>43654
+ATATGAACAAAGCAAAGACACTAGAA
+>43677
+CACGATTCATAACCCTCAGCTGAAG
+>43776
+GCTCCTGGCAACTCTGTGATGGAC
+>43903
+TATTTTGATGTTTAATGAA
+>43990
+TTTCTACTTCGTATTATTTTTATGA
+>44062
+AAACATAATAATTGATGGCGGAAGA
+>44071
+CACAGACGCAGTGGAAACCGCAGA
+>44078
+CGTGCGTCCGAGCAAAAGGTGGT
+>44187
+TTATGTAAGAATATTTGTCATTAGA
+>44239
+TATCAATGTTGACCGTAATACTCAA
+>44253
+TGGATATTGAATGTTTTTGATTTGC
+>44279
+TGTGATTTTCCCAATTTATATTAATACA
+>44374
+TAGTCGGAGTTGATGAGCTGCC
+>44412
+TCCGAATTTTCTGTCGCAGTTGTAGCT
+>44423
+TCGGCTTGGGTTTAGAAATTAAAAT
+>44558
+ACAAATCATAAATTTGGTGGGA
+>44637
+TTTTTTATCAATTGGCACCATGCGAA
+>44642
+TACTGTGTGCTGGCCTTGATGAAAGT
+>44688
+TAGATGTCTGCACGAATAACGGA
+>44945
+TGCTTCCGAGCAATCTACGTTGGTAAAA
+>45054
+TCTCATCTGACAATTTTTTAAAAGCGA
+>45088
+TCTGAAGCAGCGCTCACGGCAGAATGC
+>45203
+TGCCGGATTTTGATCCAATCAAGGGA
+>45263
+TATTGATTTTCCTATTTAGTTGAACA
+>45274
+AAAAGTCTGGATATTGTAGGATAGGA
+>45358
+CAACGCTGGACCTTGGACTCGAGGGC
+>45396
+CATCTTGTTATTCTAGTGTCTTTGGTA
+>45641
+GAACTAAGATCAGTCGACTGTAATTAT
+>45774
+CGACGAACTAGCAGCTCTGGTGT
+>45911
+CAGATATTAGTGTTTTTCAAGCAGC
+>45997
+TTAACCAGTCGGCGTTGTTTAAGTAGC
+>46031
+AAAGCGTCTACTTGAACAATGAGA
+>46054
+TTAGATCGTATTACTTGGGTGCTGG
+>46199
+TAAGCGTTAGGTCGTATTACTTGGGC
+>46255
+TAAGATTGAAAATTACTGTGGAGTAAT
+>46366
+TAAGGAATTGTCGGCCATTTAATGTGA
+>46387
+CAGAGCTTCTTCCAACGTTGGCAAG
+>46701
+TTGTATCTTTTTGCTTTTTATATT
+>46705
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>46783
+TCTCATCTGACAATTTTTTAAAAGCGA
+>46836
+TCATAAGGACAGACGGACAT
+>46892
+AAACATAATAATTGATGGCGGAAGA
+>47244
+TCTCTGTCCGCTCGCTTACGATGAGA
+>47290
+TTTCAGGAATGGGGTCGTCCCACTA
+>47348
+TGCAAAAGTAATATCAAAGACACTA
+>47359
+TTTAAAATTATAATAGTCAGGG
+>47427
+CATCTTGTTATTCTAGTGTCTTTGGT
+>47654
+TTAAACACTGAATTCGGTTTCGAAA
+>47656
+TTTCTGAAGAATCCTGTAACTCCC
+>47785
+TGTAGATGAGCGGCAAATGTGG
+>47827
+TTGATCAAGGTAGAGTTGTCGCGC
+>48184
+TTCAAGGATTAATGTAGGGGGGG
+>48576
+TTAACCCGGAGACTTGGGTGTGGGT
+>48764
+TGCAAAAGTAATATCAAAGACAATAGA
+>48782
+TGACAATGTAGTGAACGCCAGTGT
+>48893
+TATAAATGCCGTCTGATATTATTAAA
+>49017
+ACCGGATGTCATATCCAGCGTCGTGAA
+>49381
+TACAATGTAAATTCGTTTCTTCGATCA
+>49456
+TTTTGGTTTGATCGTCAGGTGGACGC
+>49484
+TCTGCTATCATTGACTCGATCATTGA
+>49569
+TCGATTGTATGATCAGTGGAAGTGGC
+>49602
+GAAATTGGCCAACATTAATTCGGAA
+>49628
+TACTTTCGTCAAAATGTTCAGGAGC
+>49715
+TTTGTCCGGGTGCTTCGAAAGAACTCT
+>49778
+CAATAGCGTCGCTGAGTAACAGTG
b
diff -r 3519c2de7fac -r 6bf9de09aa74 test-data/unmatched_fastq.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_fastq.fa Mon Apr 11 00:27:41 2022 +0000
b
@@ -0,0 +1,78 @@
+>HWI-176
+CGAGACTTTGAACTAAAAACAGT
+>HWI-277
+TCAGATGAGAGACAAATTAGAAT
+>HWI-458
+TTTCGAGGTTCCGAATTTTCTGTC
+>HWI-778
+TAGGATGTGCTCTGCGGTTTCCACT
+>HWI-895
+TTTAATTGGCGCAGTCGGTAGGATC
+>HWI-967
+GAGAATGACGGAACTGTAATA
+>HWI-974
+TTGCGAGGTCGTCCTGGGAGACCAG
+>HWI-1009
+AACGTTGGAAGAAGCTCTGCAGC
+>HWI-1011
+TTTAATGTTAACACGGACATTGACC
+>HWI-1014
+TCTCCGACTTGCTGAGCTGTTTCCGCCG
+>HWI-1106
+CATCTTGTTATTCTATTGTCTTTGGTC
+>HWI-1202
+AAAGTAAATTCTGATGACTTCAAAAT
+>HWI-1227
+TACCATGTAAATTCGTTTCTTCG
+>HWI-1262
+TATTAGCTCAAAGAACAGCTCGT
+>HWI-1297
+TTGCAGCAATATCGTCAACATCCTC
+>HWI-1520
+TAATATCGAAGCCGAACTGAGAACA
+>HWI-1584
+TAAAGTTATGACAAGAATTGATGTT
+>HWI-1590
+TCATATGCCAATTTCGTGTTTCGATG
+>HWI-1609
+TAAGATTGAAAATTACTGTGGAGTAAT
+>HWI-1745
+TTCATCCTGCTGCCGGAGC
+>HWI-1816
+CTTCCGTTATTCCTGCAGACA
+>HWI-1845
+TAATATCGAACCCGAACTGAGAA
+>HWI-1852
+TGGACACCGTCGTTCCACTTGAACT
+>HWI-1887
+TCAAGGTAGGGTTGTCGCGTGTATTT
+>HWI-1892
+TATTATCGTCCGCTGCTAAACTG
+>HWI-1936
+TAGCTGCAACTTCTGAAGTTATGGC
+>HWI-1997
+CAAAACAAGAATTTTTCGCATGGTGCC
+>HWI-2055
+TAAACTAATTCTGTCGGTTTTCTGT
+>HWI-2075
+AGACTTTGAACTAAAAACAGTTACCT
+>HWI-2244
+TAACTTCTTCTATTTTCGTGCGGGA
+>HWI-2253
+TAACTGCCCTCGATCACGCCTTCCCGA
+>HWI-2277
+TGACGACGACAGTATCGCAAGACGGT
+>HWI-2409
+GTCAGATGAGAGACAAATTAGAAT
+>HWI-2418
+TAATATCGAACCCGAACTGAGAAC
+>HWI-2490
+TTTATGACAACCCGATCGGACCTCACTC
+>HWI-2557
+TGTCCGGAGATCAAGAAGTGTTGGCAC
+>HWI-2670
+TCAGATGAGAGACAAATTAGAATTAAA
+>HWI-2719
+TATTATCGTCCGCTGCTAAACTGCTG
+>HWI-2834
+CCTAGGAGAAGGAAGTCAAGAAGGCC