Mercurial > repos > artbio > sr_bowtie_dataset_annotation
changeset 9:6bf9de09aa74 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit c8f13ba73552ccf7db7c22859b7fdc6ad121cdf0"
author | artbio |
---|---|
date | Mon, 11 Apr 2022 00:27:41 +0000 |
parents | 3519c2de7fac |
children | fd4a60fc3fca |
files | sr_bowtie_dataset_annotation.xml test-data/unmatched_2.fa test-data/unmatched_3.fa test-data/unmatched_4.fa test-data/unmatched_5.fa test-data/unmatched_fastq.fa |
diffstat | 6 files changed, 1300 insertions(+), 269 deletions(-) [+] |
line wrap: on
line diff
--- a/sr_bowtie_dataset_annotation.xml Sat Apr 09 22:45:21 2022 +0000 +++ b/sr_bowtie_dataset_annotation.xml Mon Apr 11 00:27:41 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.6"> +<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.7"> <description>by iterative alignments with sRbowtie</description> <requirements> <requirement type="package" version="1.3.1">bowtie</requirement> @@ -24,7 +24,9 @@ #elif $input[0].is_of_type('fastq'): #set format = "-q" #end if - + + mkdir unmatched_dir && + #for $file in $input: #set sample=$file.element_identifier bowtie -p \${GALAXY_SLOTS:-4} @@ -58,12 +60,15 @@ #end for remaining=\$(( \$(wc -l < class_unmatched.fa)/2)) && echo -e "$sample\tNot classified\t\${remaining}\t\${genome_aligned}" >> $output && + cp class_unmatched.fa unmatched_dir/${sample}_unmatched.fasta && + #if $format == '-q': + mv unmatched_dir/${sample}_unmatched.fasta unmatched_dir/${sample}_unmatched.fastq && + sed -n '1~4s/^@/>/p;2~4p' unmatched_dir/${sample}_unmatched.fastq > unmatched_dir/${sample}_unmatched.fasta && + rm unmatched_dir/${sample}_unmatched.fastq && + #end if #end for + ls -la unmatched_dir && Rscript $__tool_directory__/barplot.r --input $output --barplot $barplot - #if $format == '-q': - && mv class_unmatched.fa class_unmatched.fastq - && sed -n '1~4s/^@/>/p;2~4p' class_unmatched.fastq > class_unmatched.fa - #end if ]]></command> <inputs> <param name="input" type="data" multiple="True" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/> @@ -96,7 +101,9 @@ <!-- End of other bowtie index selections --> </inputs> <outputs> - <data format="fasta" name="unmatched" label="Annotate smRNAs: Unmatched reads" from_work_dir="class_unmatched.fa" /> + <collection name="unmatched" type="list" format="fasta" label="Annotate smRNAs: Unmatched reads"> + <discover_datasets pattern="__name_and_ext__" directory="unmatched_dir" /> + </collection> <data format="tabular" name="output" label="Cascade Annotation Analysis"> <actions> <action name="column_names" type="metadata" default="Sample,Reference Index,Number of reads, Total reads" /> @@ -106,6 +113,22 @@ </outputs> <tests> <test> + <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" /> + <param name="genomeSource" value="history" /> + <param name="ownFile" value ="2L-tail.fa" ftype="fasta" /> + <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" /> + <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> + <output name="output" ftype="tabular" file="multisample5_output.tab" /> + <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" /> + <output_collection name="unmatched" type="list" count="5"> + <element name="sample5.fa_unmatched" file="unmatched_5.fa" ftype="fasta"/> + <element name="sample4.fa_unmatched" file="unmatched_4.fa" ftype="fasta"/> + <element name="sample3.fa_unmatched" file="unmatched_3.fa" ftype="fasta"/> + <element name="sample2.fa_unmatched" file="unmatched_2.fa" ftype="fasta"/> + <element name="sample1.fa_unmatched" file="unmatched_1.fa" ftype="fasta"/> + </output_collection> + </test> + <test> <param name="input" value ="sample1.fa" ftype="fasta" /> <param name="genomeSource" value="history" /> <param name="ownFile" value ="2L-tail.fa" ftype="fasta" /> @@ -113,7 +136,9 @@ <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> <output name="output" ftype="tabular" file="sample1_output.tab" /> <output name="barplot" ftype="pdf" file="sample1_output.pdf" compare="sim_size" delta="500"/> - <output name="unmatched" ftype="fasta" file="unmatched_1.fa" /> + <output_collection name="unmatched" type="list"> + <element name="sample1.fa_unmatched" file="unmatched_1.fa" ftype="fasta"/> + </output_collection> </test> <test> <param name="input" value ="sample.fastq" ftype="fastq" /> @@ -123,17 +148,9 @@ <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> <output name="output" ftype="tabular" file="sample_output.tab" /> <output name="barplot" ftype="pdf" file="sample_output.pdf" compare="sim_size" delta="500"/> - <output name="unmatched" ftype="fasta" file="unmatched_2.fa" /> - </test> - <test> - <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" /> - <param name="genomeSource" value="history" /> - <param name="ownFile" value ="2L-tail.fa" ftype="fasta" /> - <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" /> - <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> - <output name="output" ftype="tabular" file="multisample5_output.tab" /> - <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" /> - <output name="unmatched" ftype="fasta" file="unmatched_3.fa" /> + <output_collection name="unmatched" type="list"> + <element name="sample.fastq_unmatched" file="unmatched_fastq.fa" ftype="fasta"/> + </output_collection> </test> </tests> <help>
--- a/test-data/unmatched_2.fa Sat Apr 09 22:45:21 2022 +0000 +++ b/test-data/unmatched_2.fa Mon Apr 11 00:27:41 2022 +0000 @@ -1,78 +1,444 @@ ->HWI-176 -CGAGACTTTGAACTAAAAACAGT ->HWI-277 +>30787 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>30836 +TAAATATTTTTTTTGAAACA +>30977 +CGACTGTAATTATTAGCACAATACT +>30985 +GACGATATTGCTGCAATAGACCTTGA +>31000 TCAGATGAGAGACAAATTAGAAT ->HWI-458 -TTTCGAGGTTCCGAATTTTCTGTC ->HWI-778 -TAGGATGTGCTCTGCGGTTTCCACT ->HWI-895 -TTTAATTGGCGCAGTCGGTAGGATC ->HWI-967 -GAGAATGACGGAACTGTAATA ->HWI-974 -TTGCGAGGTCGTCCTGGGAGACCAG ->HWI-1009 -AACGTTGGAAGAAGCTCTGCAGC ->HWI-1011 -TTTAATGTTAACACGGACATTGACC ->HWI-1014 -TCTCCGACTTGCTGAGCTGTTTCCGCCG ->HWI-1106 -CATCTTGTTATTCTATTGTCTTTGGTC ->HWI-1202 -AAAGTAAATTCTGATGACTTCAAAAT ->HWI-1227 -TACCATGTAAATTCGTTTCTTCG ->HWI-1262 -TATTAGCTCAAAGAACAGCTCGT ->HWI-1297 -TTGCAGCAATATCGTCAACATCCTC ->HWI-1520 -TAATATCGAAGCCGAACTGAGAACA ->HWI-1584 -TAAAGTTATGACAAGAATTGATGTT ->HWI-1590 -TCATATGCCAATTTCGTGTTTCGATG ->HWI-1609 +>31256 +TAATTCGGAATGCCTGCTCTACT +>31417 +TCAATGATCGCTGTGCTCAGTAGGA +>31506 +TTTGTCTGACGTTAAAAAATATA +>31567 +TCCACATTAGGAGGATTATTAGACAAC +>31790 +AAACATAATAATTGATGGCGGAAGA +>31872 +AAGGTAATCATAGAGCACCACGGTT +>32157 +TTTCTGTGAATTCACATGCTGATGA +>32192 +TTTCTCATGTATAAAATGCTCTGATGG +>32223 +TATCTTGTTATTCTAGTGTCTTTGGTT +>32338 +TGTGGGACTCGAGCCAAAATGGCAACCT +>32497 +TGCGGTTGGACAATTTTTTTTTTATA +>32506 +TTGTTGTTTGGAGGAAGTTCCTTT +>32510 +TCTTCCGCCATCAATTATTATGTTTT +>32522 +TACTTGACTTTTCTATAGAATCTGGT +>32540 +TATCGTCAACATCCTCGAACGATCGAGA +>32626 +TTGATCAAGGTAGGGTTGTCGC +>32646 +TCTGTTAAACACCCCTGAATCGTGGAT +>32657 +TTTGGACATTTTGCAGGTGATACAAT +>32682 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>32716 +TATCTATAGTTCCGATTGGCCATCTC +>32885 +GAAAGTGGGTATCTGTATTTTAGGC +>32967 +TAAAGATACCATCTAACCTCCTTGGA +>33075 +ACAGATATTAGTGTTTTTCAAGCAGC +>33147 +ATCGCAATTATGGCATAACAGATTCGGA +>33163 +TTCAGCGTCGCTTGATTGAATAGAT +>33231 +TAAGAACTTCTGAGGTGAAGGGC +>33242 +TTGGGTTTAGAAATTAAAATTAAGGC +>33244 +TATAGAAAGTTACGAATATATTAGGA +>33248 +TTTTTGATCAATTGGCACCGTGCGAA +>33296 +TGCAGAGCTTCTTCCAACGTTGGCAAG +>33318 +TAGATGTCTGCAGGAATAACGGA +>33338 +TCGACTATTAATGGCTGTTAGAATT +>33475 +CAAACTTATCGACCATCTCCTCAAACG +>33601 +TTACCCTTTTTCCGGAGCGTTTGTGC +>33611 +AATGATCGCTGTGCTCAGTATGACGG +>33662 +TTTGATTCATTACAATTTACGCTGAA +>33737 +CAGATATTAGTGTTTTTCAAGCAGC +>33747 +TACATGTAAAGCAGCTGTGTGTGC +>33815 +AGTTTTTGGAATCACTTGA +>34032 +TGCGGACGTGTGCTCGCTGCGTGA +>34200 +TAATATATGTATAATCTGCTTGGTG +>34240 +TCTTTACGCCATATAAATCATTTCGA +>34259 +ACAAATCATAAATTTGATGGGACGA +>34268 +TCTTGTGGACTTCACTCTAG +>34297 +TAAGTAAATAGTCCCCGCCTTATTGAGG +>34337 +TGGATAATGAATGTTTTTGATTTGC +>34569 +TCTAGGTTCTTCTGAAATCGTGGGA +>34643 +TCGTGTAGACCGGATAAGATTTTTT +>34697 +TCTAATACTGTGAAAGGGTGGGG +>34808 +GTAGGATGTGCTCTGCGGTTTCCAC +>34892 +TCTGTCGCAGTTGTAGCTTGCAATA +>34896 +TGTAGTTGCCACTTATGCTGTCCA +>35151 +TAGCAATGTCCGTCTGTCCGTATGA +>35196 +TGTAAATGGTCAGCGAAAGCAAAGG +>35211 +CAGATATTAGTGTTTTTCAAGCAGCGG +>35246 +CCTCAGAGAACGTCAGACCGCG +>35272 +TGCAGAGCTTCTTCCAACGTTGG +>35273 +TCTGGTAGTAAGAAAAATGTAGCTT +>35286 +TATGTATGGATATGTATATTTATGGGT +>35317 +TTTTGGTTTGATCGTCAGGTGGTC +>35512 +GACGATAATAGTGAATTTTGGACA +>35566 +TTCGAATTCGCGCCATTTCACAATC +>35948 +TGGCCTGTATACGCTTTCTGTTG +>36009 +TTGCAAAAGTCATATCTTGAGG +>36059 +TAAGTTACTATGGATCCATAAGGGTA +>36100 +TGATATGGGACTTGTAGCTTTTTTAAA +>36121 +TGTCGTGGGCTGTGCGTTTGAGAA +>36235 +TGCTACCTTTAGCTGCAAGATTAACT +>36238 +CAGATATTAGTGTTTTTCAAGCAGCGG +>36523 +TGCGAGTCCGAGCTTTGAACGTGGG +>36607 +TCTCAATGTAATGTCTTCTTTTTGGA +>36624 +CGTTCTTTAAAACCACCAATGGGA +>36776 +TACTCACGACCATTGTCATTTCTCAAG +>36825 +CGTTCGAGGTCCACTTTCTTAGCGGA +>36850 +TCTGATGTCGGCGAGAAAGGAGTCTCA +>36872 +TCAAGGCCAGCACACAGTAACATGGT +>36968 +ACATTGGAAATACCGCGGGACCGC +>36994 +TATACGTTTTTTGACCTCTTCTCTTTGA +>37110 +TGATAATGTAAACAAAGATAAAGGG +>37210 +TCAGATGAAAGACAAATTAGAATTAAA +>37224 +GACGATAATAGTGAATTTTGGACA +>37226 +TCTGAAGTTGCCGCACTAGAGATGG +>37339 +TATACAGTCCACTATATCGTTGTTTAA +>37395 +TACAATTTACGCTGAATTTAAATGAA +>37460 +CACAAAGTAACGTGCACCACCATTT +>37766 +TCTGTGCAACTTTGTACGCGAAGTCAGC +>37836 +TCAAGGATTAATGTAGGGGGGGGG +>38096 +TTTGGAGGTAGAAGTCTTAGTGGCCGC +>38100 +GACAAAATATGCCCTTCAATTTAGA +>38113 +TATGGTCCAGAATGTAGCCTCGGC +>38213 +TAACTGTTTTTAGTTCAAAGTCTCGAA +>38224 +TTTCAGTTTTTATTGTTAGTCACAGG +>38263 +TAACTGCGAAGTCGATCAGGTCCGA +>38266 +ATCCGGACGATTGACGAGGAGCCCATT +>38271 +TTATGTCAGTGTCGAAGGCGATCGAA +>38373 +TCTATAGCCTTGGCGTAGGAACTCGCA +>38384 +GCTACAACTGCGACAGAAAATTCGGA +>38468 +TATTATCGTCCGCTGCTAAACTGC +>38491 +TGCATACTTCCGTTCTCTTTTCGGGA +>38673 +TTGTGTATTGTGATTCTGATTCGTG +>38745 +TCCCTGGCCCGCTAGACAGCAGGA +>38784 +TGTGCTAATAATTAAAGTCGACTGA +>38873 +TGTCATACTTTCGTCAAAA +>39154 +TAATAGTCAGGGCGCGAATTTTTAAAA +>39325 +TTAGGACTTATTGAACTTTACGGTA +>39359 +TATGCAAATCAAGTGTGACCGTAGCT +>39450 +TTAAACAACGATATAGTGGACAGTA +>39453 +TCTGCCGGATTTTGATCCAATCAAGG +>39819 +GTGGAATTGAAAAAGAACCAGACACA +>40040 +TTTTTGGAACTACCTGAGTCGGTT +>40183 +TAACACAAAGCAGTATGATTTAATAAT +>40189 +CAGCAAGCTGAGATGTACATTAGTATA +>40374 +TACGTTTTCTTGCAGATCAAAAA +>40445 +CGTTCTTTAAAACCACCAATGGGA +>40467 +TACGCAGATTCCTGGGAGTTACAGGA +>40488 +TGATTTGGGCTTGCATACTTGTACT +>40797 +TATATTCGTGTTCATGTGTGAACAGC +>40831 +TCTAAGAACTTCTGAGGTGAAGGGC +>40951 +TTCTTCGTAAGTCAAAATAGTGTCGCC +>40988 +TATGATTGATTGCTTGAGAGT +>40998 +TGATAGAGCTGCATTTGAATTAACGG +>41061 +CTCTTTCCGCTCACTCCCGCTGAGA +>41076 +TGCATTAAGAAGATTTAGGATCC +>41140 +TCAAGGATTAATGTAGGGGGGG +>41143 +CAATAGCGTCGCTGAGTAACAGTG +>41162 +TACGGATTGCAGCGGCTAG +>41183 +TTTTTTGGCACACGATTTTTTGGACGT +>41227 +TAATATATGTATACTCTGCTTGGTG +>41265 +TTTATGATTTTTGGTAATA +>41365 +TAGGAGGGTTCCACAACTATTTCGGGG +>41837 +TAGTTTGACACTGTTTGGAGACGTGG +>42053 +TCTGTGGTCGAATCGAAGGAGTGC +>42113 +TAATAGATCGCTCACCTGTTCCTGG +>42392 +CGGCATCGGAAAACTCCCAGCGGGGC +>42504 +TAACGTTATTATTATTTGAAAATAGAA +>42566 +TTTGTTGGGTAGGAACTTTACTGC +>42668 +TGCTGCAGAGCTTCTTCCAACGTTGG +>42751 +CCAAAGTCTGGTTGTCAGAAAATGTGC +>42777 +AAGGAAGGAACCAAAGAAGCACAAACG +>42804 +TAATTCTAATTTGTCTCTCATCTGA +>42830 +TAGTATACTTATTAAGTCATTTGA +>42857 +AGAGTATTCATCTTGAGGCGTGTG +>42886 +TGAACAACGATTTATGTATATAAGAA +>42899 +TACGATAATAGTGAATTTTGGACA +>42936 +TTTGACTAAAAGTCGCTTGTTTTGGA +>43050 +AATACATAACTCTGGACACAGGAGA +>43219 +TACTTTCGTCAAAATGTTCAGGAGCT +>43242 +TGACATGTCTATTTCCATGGGTTCGGA +>43268 +GTGTATAAAAAAATTTATTGTTGAGCA +>43299 +TCTGGATGATGGCTGATGCTCGTTG +>43491 +TCTGATGACAATGAATTTTTTAGACA +>43520 +TATTGTTGTAATTGCTGCCTCGGTTG +>43654 +ATATGAACAAAGCAAAGACACTAGAA +>43677 +CACGATTCATAACCCTCAGCTGAAG +>43776 +GCTCCTGGCAACTCTGTGATGGAC +>43903 +TATTTTGATGTTTAATGAA +>43990 +TTTCTACTTCGTATTATTTTTATGA +>44062 +AAACATAATAATTGATGGCGGAAGA +>44071 +CACAGACGCAGTGGAAACCGCAGA +>44078 +CGTGCGTCCGAGCAAAAGGTGGT +>44187 +TTATGTAAGAATATTTGTCATTAGA +>44239 +TATCAATGTTGACCGTAATACTCAA +>44253 +TGGATATTGAATGTTTTTGATTTGC +>44279 +TGTGATTTTCCCAATTTATATTAATACA +>44374 +TAGTCGGAGTTGATGAGCTGCC +>44412 +TCCGAATTTTCTGTCGCAGTTGTAGCT +>44423 +TCGGCTTGGGTTTAGAAATTAAAAT +>44558 +ACAAATCATAAATTTGGTGGGA +>44637 +TTTTTTATCAATTGGCACCATGCGAA +>44642 +TACTGTGTGCTGGCCTTGATGAAAGT +>44688 +TAGATGTCTGCACGAATAACGGA +>44945 +TGCTTCCGAGCAATCTACGTTGGTAAAA +>45054 +TCTCATCTGACAATTTTTTAAAAGCGA +>45088 +TCTGAAGCAGCGCTCACGGCAGAATGC +>45203 +TGCCGGATTTTGATCCAATCAAGGGA +>45263 +TATTGATTTTCCTATTTAGTTGAACA +>45274 +AAAAGTCTGGATATTGTAGGATAGGA +>45358 +CAACGCTGGACCTTGGACTCGAGGGC +>45396 +CATCTTGTTATTCTAGTGTCTTTGGTA +>45641 +GAACTAAGATCAGTCGACTGTAATTAT +>45774 +CGACGAACTAGCAGCTCTGGTGT +>45911 +CAGATATTAGTGTTTTTCAAGCAGC +>45997 +TTAACCAGTCGGCGTTGTTTAAGTAGC +>46031 +AAAGCGTCTACTTGAACAATGAGA +>46054 +TTAGATCGTATTACTTGGGTGCTGG +>46199 +TAAGCGTTAGGTCGTATTACTTGGGC +>46255 TAAGATTGAAAATTACTGTGGAGTAAT ->HWI-1745 -TTCATCCTGCTGCCGGAGC ->HWI-1816 -CTTCCGTTATTCCTGCAGACA ->HWI-1845 -TAATATCGAACCCGAACTGAGAA ->HWI-1852 -TGGACACCGTCGTTCCACTTGAACT ->HWI-1887 -TCAAGGTAGGGTTGTCGCGTGTATTT ->HWI-1892 -TATTATCGTCCGCTGCTAAACTG ->HWI-1936 -TAGCTGCAACTTCTGAAGTTATGGC ->HWI-1997 -CAAAACAAGAATTTTTCGCATGGTGCC ->HWI-2055 -TAAACTAATTCTGTCGGTTTTCTGT ->HWI-2075 -AGACTTTGAACTAAAAACAGTTACCT ->HWI-2244 -TAACTTCTTCTATTTTCGTGCGGGA ->HWI-2253 -TAACTGCCCTCGATCACGCCTTCCCGA ->HWI-2277 -TGACGACGACAGTATCGCAAGACGGT ->HWI-2409 -GTCAGATGAGAGACAAATTAGAAT ->HWI-2418 -TAATATCGAACCCGAACTGAGAAC ->HWI-2490 -TTTATGACAACCCGATCGGACCTCACTC ->HWI-2557 -TGTCCGGAGATCAAGAAGTGTTGGCAC ->HWI-2670 -TCAGATGAGAGACAAATTAGAATTAAA ->HWI-2719 -TATTATCGTCCGCTGCTAAACTGCTG ->HWI-2834 -CCTAGGAGAAGGAAGTCAAGAAGGCC +>46366 +TAAGGAATTGTCGGCCATTTAATGTGA +>46387 +CAGAGCTTCTTCCAACGTTGGCAAG +>46701 +TTGTATCTTTTTGCTTTTTATATT +>46705 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>46783 +TCTCATCTGACAATTTTTTAAAAGCGA +>46836 +TCATAAGGACAGACGGACAT +>46892 +AAACATAATAATTGATGGCGGAAGA +>47244 +TCTCTGTCCGCTCGCTTACGATGAGA +>47290 +TTTCAGGAATGGGGTCGTCCCACTA +>47348 +TGCAAAAGTAATATCAAAGACACTA +>47359 +TTTAAAATTATAATAGTCAGGG +>47427 +CATCTTGTTATTCTAGTGTCTTTGGT +>47654 +TTAAACACTGAATTCGGTTTCGAAA +>47656 +TTTCTGAAGAATCCTGTAACTCCC +>47785 +TGTAGATGAGCGGCAAATGTGG +>47827 +TTGATCAAGGTAGAGTTGTCGCGC +>48184 +TTCAAGGATTAATGTAGGGGGGG +>48576 +TTAACCCGGAGACTTGGGTGTGGGT +>48764 +TGCAAAAGTAATATCAAAGACAATAGA +>48782 +TGACAATGTAGTGAACGCCAGTGT +>48893 +TATAAATGCCGTCTGATATTATTAAA +>49017 +ACCGGATGTCATATCCAGCGTCGTGAA +>49381 +TACAATGTAAATTCGTTTCTTCGATCA +>49456 +TTTTGGTTTGATCGTCAGGTGGACGC +>49484 +TCTGCTATCATTGACTCGATCATTGA +>49569 +TCGATTGTATGATCAGTGGAAGTGGC +>49602 +GAAATTGGCCAACATTAATTCGGAA +>49628 +TACTTTCGTCAAAATGTTCAGGAGC +>49715 +TTTGTCCGGGTGCTTCGAAAGAACTCT +>49778 +CAATAGCGTCGCTGAGTAACAGTG
--- a/test-data/unmatched_3.fa Sat Apr 09 22:45:21 2022 +0000 +++ b/test-data/unmatched_3.fa Mon Apr 11 00:27:41 2022 +0000 @@ -1,177 +1,3 @@ ->25207 -TCGCACTAGCTTCGGTCGTCTCGGA ->25229 -TCTGTTCGAGGTTCTTTATTTGAA ->25283 -TAAGGGAGCGAGATCGTTCGATGCA ->25363 -TAGTGTTGGGTGTAGGCCAGGGAG ->25376 -TCCGCCGACAATGTTGCAACATGA ->25435 -TTTCTTCAAGCTGCGCGTTTTTCGG ->25593 -TCGAAAACTTCTTCGCACTCTTCGTT ->25660 -TCTCGCTGCGTGAACGATGAAGGC ->25968 -TTTGCTAGGACTGCTGCATAAG ->26220 -GCCCGTGTCGATGAATGCTTCAAACT ->26261 -TATTATCGTCCGCTGCTAAACTGC ->26543 -CTTATGTATAAAAAGCTCTGATGGA ->26610 -TCCTTAAGTTCGTTGATTTGGGCT ->26675 -CAATATCGTCAACATCCTCGAACGAT ->26873 -TTGGGAGCTGAATCCCGTTACGGTA ->26978 -CGCGCATTAGCTCAAATCTAGTTGGA ->27220 -TGTTTATTTGTCAAGTTTAGATAATA ->27397 -TAGTTCACAGCTGTATGTCCAGATGGGT ->27869 -TATGGTCCAGAATGTAGCCTCGGC ->27878 -TATTAATCGGGCCACAAATATCGGTA ->28035 -GGTATTCTTTGCGAGGTCGTCCTGG ->28154 -AAAGACGAGAACGCGTATATGTGTGC ->28319 -TGACCAACCTTAAAAGATCGGGGT ->28387 -TATTACTATTTCTAAGCTTTGTTTAAA ->28594 -TTTTGAGGTTGGTCAAGAAGTTGTT ->28608 -TAAGATTGAAAATTACTGTGGAGT ->28668 -TACAGATTCTAGAGACAAAGACGC ->28674 -TAAGTACATGCGCCCAGCCGCCGTGA ->28836 -TCTGGTTAAGGTCGGAATACTCGTCT ->29056 -TGCTTTACATACCCTTTGGTGCCC ->29323 -TTTTGCTGCAGAGCTTCTTCCAACGT ->29342 -TATGTATGGATATGTATATTTATGGT ->29387 -TGATAATCGACCTCTTCCATCGTTGT ->29423 -TCTAAGAACTTCTGAGGTGAAGG ->29462 -TGCATTTCAATCGGAAGAGTACTCTG ->29492 -TAAAACACAAATCTCGACATACAGA ->29703 -GTACAGGTTCTGATGACAATG ->29785 -TCAGATGAAAGACAAATTAGAATT ->30053 -TAAGAACTTCTGAGATGAAGGGC ->30096 -TGGATATTGAATGTTTTTGATTTGC ->30136 -TGTAGTCGTCGTATGTCCGGA ->30272 -TGCATTCGTGGATTCGCATTCGAGA ->30497 -TGACAAAATATGCCCTTCACCTCAGA ->30787 -TAACTGTTTTTAGTTCAAAGTCTCGGA ->30836 -TAAATATTTTTTTTGAAACA ->30977 -CGACTGTAATTATTAGCACAATACT ->30985 -GACGATATTGCTGCAATAGACCTTGA ->31000 -TCAGATGAGAGACAAATTAGAAT ->31256 -TAATTCGGAATGCCTGCTCTACT ->31417 -TCAATGATCGCTGTGCTCAGTAGGA ->31506 -TTTGTCTGACGTTAAAAAATATA ->31567 -TCCACATTAGGAGGATTATTAGACAAC ->31790 -AAACATAATAATTGATGGCGGAAGA ->31872 -AAGGTAATCATAGAGCACCACGGTT ->32157 -TTTCTGTGAATTCACATGCTGATGA ->32192 -TTTCTCATGTATAAAATGCTCTGATGG ->32223 -TATCTTGTTATTCTAGTGTCTTTGGTT ->32338 -TGTGGGACTCGAGCCAAAATGGCAACCT ->32497 -TGCGGTTGGACAATTTTTTTTTTATA ->32506 -TTGTTGTTTGGAGGAAGTTCCTTT ->32510 -TCTTCCGCCATCAATTATTATGTTTT ->32522 -TACTTGACTTTTCTATAGAATCTGGT ->32540 -TATCGTCAACATCCTCGAACGATCGAGA ->32626 -TTGATCAAGGTAGGGTTGTCGC ->32646 -TCTGTTAAACACCCCTGAATCGTGGAT ->32657 -TTTGGACATTTTGCAGGTGATACAAT ->32682 -TAACTGTTTTTAGTTCAAAGTCTCGGA ->32716 -TATCTATAGTTCCGATTGGCCATCTC ->32885 -GAAAGTGGGTATCTGTATTTTAGGC ->32967 -TAAAGATACCATCTAACCTCCTTGGA ->33075 -ACAGATATTAGTGTTTTTCAAGCAGC ->33147 -ATCGCAATTATGGCATAACAGATTCGGA ->33163 -TTCAGCGTCGCTTGATTGAATAGAT ->33231 -TAAGAACTTCTGAGGTGAAGGGC ->33242 -TTGGGTTTAGAAATTAAAATTAAGGC ->33244 -TATAGAAAGTTACGAATATATTAGGA ->33248 -TTTTTGATCAATTGGCACCGTGCGAA ->33296 -TGCAGAGCTTCTTCCAACGTTGGCAAG ->33318 -TAGATGTCTGCAGGAATAACGGA ->33338 -TCGACTATTAATGGCTGTTAGAATT ->33475 -CAAACTTATCGACCATCTCCTCAAACG ->33601 -TTACCCTTTTTCCGGAGCGTTTGTGC ->33611 -AATGATCGCTGTGCTCAGTATGACGG ->33662 -TTTGATTCATTACAATTTACGCTGAA ->33737 -CAGATATTAGTGTTTTTCAAGCAGC ->33747 -TACATGTAAAGCAGCTGTGTGTGC ->33815 -AGTTTTTGGAATCACTTGA >34032 TGCGGACGTGTGCTCGCTGCGTGA >34200
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unmatched_4.fa Mon Apr 11 00:27:41 2022 +0000 @@ -0,0 +1,332 @@ +>30787 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>30836 +TAAATATTTTTTTTGAAACA +>30977 +CGACTGTAATTATTAGCACAATACT +>30985 +GACGATATTGCTGCAATAGACCTTGA +>31000 +TCAGATGAGAGACAAATTAGAAT +>31256 +TAATTCGGAATGCCTGCTCTACT +>31417 +TCAATGATCGCTGTGCTCAGTAGGA +>31506 +TTTGTCTGACGTTAAAAAATATA +>31567 +TCCACATTAGGAGGATTATTAGACAAC +>31790 +AAACATAATAATTGATGGCGGAAGA +>31872 +AAGGTAATCATAGAGCACCACGGTT +>32157 +TTTCTGTGAATTCACATGCTGATGA +>32192 +TTTCTCATGTATAAAATGCTCTGATGG +>32223 +TATCTTGTTATTCTAGTGTCTTTGGTT +>32338 +TGTGGGACTCGAGCCAAAATGGCAACCT +>32497 +TGCGGTTGGACAATTTTTTTTTTATA +>32506 +TTGTTGTTTGGAGGAAGTTCCTTT +>32510 +TCTTCCGCCATCAATTATTATGTTTT +>32522 +TACTTGACTTTTCTATAGAATCTGGT +>32540 +TATCGTCAACATCCTCGAACGATCGAGA +>32626 +TTGATCAAGGTAGGGTTGTCGC +>32646 +TCTGTTAAACACCCCTGAATCGTGGAT +>32657 +TTTGGACATTTTGCAGGTGATACAAT +>32682 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>32716 +TATCTATAGTTCCGATTGGCCATCTC +>32885 +GAAAGTGGGTATCTGTATTTTAGGC +>32967 +TAAAGATACCATCTAACCTCCTTGGA +>33075 +ACAGATATTAGTGTTTTTCAAGCAGC +>33147 +ATCGCAATTATGGCATAACAGATTCGGA +>33163 +TTCAGCGTCGCTTGATTGAATAGAT +>33231 +TAAGAACTTCTGAGGTGAAGGGC +>33242 +TTGGGTTTAGAAATTAAAATTAAGGC +>33244 +TATAGAAAGTTACGAATATATTAGGA +>33248 +TTTTTGATCAATTGGCACCGTGCGAA +>33296 +TGCAGAGCTTCTTCCAACGTTGGCAAG +>33318 +TAGATGTCTGCAGGAATAACGGA +>33338 +TCGACTATTAATGGCTGTTAGAATT +>33475 +CAAACTTATCGACCATCTCCTCAAACG +>33601 +TTACCCTTTTTCCGGAGCGTTTGTGC +>33611 +AATGATCGCTGTGCTCAGTATGACGG +>33662 +TTTGATTCATTACAATTTACGCTGAA +>33737 +CAGATATTAGTGTTTTTCAAGCAGC +>33747 +TACATGTAAAGCAGCTGTGTGTGC +>33815 +AGTTTTTGGAATCACTTGA +>34032 +TGCGGACGTGTGCTCGCTGCGTGA +>34200 +TAATATATGTATAATCTGCTTGGTG +>34240 +TCTTTACGCCATATAAATCATTTCGA +>34259 +ACAAATCATAAATTTGATGGGACGA +>34268 +TCTTGTGGACTTCACTCTAG +>34297 +TAAGTAAATAGTCCCCGCCTTATTGAGG +>34337 +TGGATAATGAATGTTTTTGATTTGC +>34569 +TCTAGGTTCTTCTGAAATCGTGGGA +>34643 +TCGTGTAGACCGGATAAGATTTTTT +>34697 +TCTAATACTGTGAAAGGGTGGGG +>34808 +GTAGGATGTGCTCTGCGGTTTCCAC +>34892 +TCTGTCGCAGTTGTAGCTTGCAATA +>34896 +TGTAGTTGCCACTTATGCTGTCCA +>35151 +TAGCAATGTCCGTCTGTCCGTATGA +>35196 +TGTAAATGGTCAGCGAAAGCAAAGG +>35211 +CAGATATTAGTGTTTTTCAAGCAGCGG +>35246 +CCTCAGAGAACGTCAGACCGCG +>35272 +TGCAGAGCTTCTTCCAACGTTGG +>35273 +TCTGGTAGTAAGAAAAATGTAGCTT +>35286 +TATGTATGGATATGTATATTTATGGGT +>35317 +TTTTGGTTTGATCGTCAGGTGGTC +>35512 +GACGATAATAGTGAATTTTGGACA +>35566 +TTCGAATTCGCGCCATTTCACAATC +>35948 +TGGCCTGTATACGCTTTCTGTTG +>36009 +TTGCAAAAGTCATATCTTGAGG +>36059 +TAAGTTACTATGGATCCATAAGGGTA +>36100 +TGATATGGGACTTGTAGCTTTTTTAAA +>36121 +TGTCGTGGGCTGTGCGTTTGAGAA +>36235 +TGCTACCTTTAGCTGCAAGATTAACT +>36238 +CAGATATTAGTGTTTTTCAAGCAGCGG +>36523 +TGCGAGTCCGAGCTTTGAACGTGGG +>36607 +TCTCAATGTAATGTCTTCTTTTTGGA +>36624 +CGTTCTTTAAAACCACCAATGGGA +>36776 +TACTCACGACCATTGTCATTTCTCAAG +>36825 +CGTTCGAGGTCCACTTTCTTAGCGGA +>36850 +TCTGATGTCGGCGAGAAAGGAGTCTCA +>36872 +TCAAGGCCAGCACACAGTAACATGGT +>36968 +ACATTGGAAATACCGCGGGACCGC +>36994 +TATACGTTTTTTGACCTCTTCTCTTTGA +>37110 +TGATAATGTAAACAAAGATAAAGGG +>37210 +TCAGATGAAAGACAAATTAGAATTAAA +>37224 +GACGATAATAGTGAATTTTGGACA +>37226 +TCTGAAGTTGCCGCACTAGAGATGG +>37339 +TATACAGTCCACTATATCGTTGTTTAA +>37395 +TACAATTTACGCTGAATTTAAATGAA +>37460 +CACAAAGTAACGTGCACCACCATTT +>37766 +TCTGTGCAACTTTGTACGCGAAGTCAGC +>37836 +TCAAGGATTAATGTAGGGGGGGGG +>38096 +TTTGGAGGTAGAAGTCTTAGTGGCCGC +>38100 +GACAAAATATGCCCTTCAATTTAGA +>38113 +TATGGTCCAGAATGTAGCCTCGGC +>38213 +TAACTGTTTTTAGTTCAAAGTCTCGAA +>38224 +TTTCAGTTTTTATTGTTAGTCACAGG +>38263 +TAACTGCGAAGTCGATCAGGTCCGA +>38266 +ATCCGGACGATTGACGAGGAGCCCATT +>38271 +TTATGTCAGTGTCGAAGGCGATCGAA +>38373 +TCTATAGCCTTGGCGTAGGAACTCGCA +>38384 +GCTACAACTGCGACAGAAAATTCGGA +>38468 +TATTATCGTCCGCTGCTAAACTGC +>38491 +TGCATACTTCCGTTCTCTTTTCGGGA +>38673 +TTGTGTATTGTGATTCTGATTCGTG +>38745 +TCCCTGGCCCGCTAGACAGCAGGA +>38784 +TGTGCTAATAATTAAAGTCGACTGA +>38873 +TGTCATACTTTCGTCAAAA +>39154 +TAATAGTCAGGGCGCGAATTTTTAAAA +>39325 +TTAGGACTTATTGAACTTTACGGTA +>39359 +TATGCAAATCAAGTGTGACCGTAGCT +>39450 +TTAAACAACGATATAGTGGACAGTA +>39453 +TCTGCCGGATTTTGATCCAATCAAGG +>39819 +GTGGAATTGAAAAAGAACCAGACACA +>40040 +TTTTTGGAACTACCTGAGTCGGTT +>40183 +TAACACAAAGCAGTATGATTTAATAAT +>40189 +CAGCAAGCTGAGATGTACATTAGTATA +>40374 +TACGTTTTCTTGCAGATCAAAAA +>40445 +CGTTCTTTAAAACCACCAATGGGA +>40467 +TACGCAGATTCCTGGGAGTTACAGGA +>40488 +TGATTTGGGCTTGCATACTTGTACT +>40797 +TATATTCGTGTTCATGTGTGAACAGC +>40831 +TCTAAGAACTTCTGAGGTGAAGGGC +>40951 +TTCTTCGTAAGTCAAAATAGTGTCGCC +>40988 +TATGATTGATTGCTTGAGAGT +>40998 +TGATAGAGCTGCATTTGAATTAACGG +>41061 +CTCTTTCCGCTCACTCCCGCTGAGA +>41076 +TGCATTAAGAAGATTTAGGATCC +>41140 +TCAAGGATTAATGTAGGGGGGG +>41143 +CAATAGCGTCGCTGAGTAACAGTG +>41162 +TACGGATTGCAGCGGCTAG +>41183 +TTTTTTGGCACACGATTTTTTGGACGT +>41227 +TAATATATGTATACTCTGCTTGGTG +>41265 +TTTATGATTTTTGGTAATA +>41365 +TAGGAGGGTTCCACAACTATTTCGGGG +>41837 +TAGTTTGACACTGTTTGGAGACGTGG +>42053 +TCTGTGGTCGAATCGAAGGAGTGC +>42113 +TAATAGATCGCTCACCTGTTCCTGG +>42392 +CGGCATCGGAAAACTCCCAGCGGGGC +>42504 +TAACGTTATTATTATTTGAAAATAGAA +>42566 +TTTGTTGGGTAGGAACTTTACTGC +>42668 +TGCTGCAGAGCTTCTTCCAACGTTGG +>42751 +CCAAAGTCTGGTTGTCAGAAAATGTGC +>42777 +AAGGAAGGAACCAAAGAAGCACAAACG +>42804 +TAATTCTAATTTGTCTCTCATCTGA +>42830 +TAGTATACTTATTAAGTCATTTGA +>42857 +AGAGTATTCATCTTGAGGCGTGTG +>42886 +TGAACAACGATTTATGTATATAAGAA +>42899 +TACGATAATAGTGAATTTTGGACA +>42936 +TTTGACTAAAAGTCGCTTGTTTTGGA +>43050 +AATACATAACTCTGGACACAGGAGA +>43219 +TACTTTCGTCAAAATGTTCAGGAGCT +>43242 +TGACATGTCTATTTCCATGGGTTCGGA +>43268 +GTGTATAAAAAAATTTATTGTTGAGCA +>43299 +TCTGGATGATGGCTGATGCTCGTTG +>43491 +TCTGATGACAATGAATTTTTTAGACA +>43520 +TATTGTTGTAATTGCTGCCTCGGTTG +>43654 +ATATGAACAAAGCAAAGACACTAGAA +>43677 +CACGATTCATAACCCTCAGCTGAAG +>43776 +GCTCCTGGCAACTCTGTGATGGAC +>43903 +TATTTTGATGTTTAATGAA +>43990 +TTTCTACTTCGTATTATTTTTATGA +>44062 +AAACATAATAATTGATGGCGGAAGA +>44071 +CACAGACGCAGTGGAAACCGCAGA +>44078 +CGTGCGTCCGAGCAAAAGGTGGT +>44187 +TTATGTAAGAATATTTGTCATTAGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unmatched_5.fa Mon Apr 11 00:27:41 2022 +0000 @@ -0,0 +1,412 @@ +>30787 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>30836 +TAAATATTTTTTTTGAAACA +>30977 +CGACTGTAATTATTAGCACAATACT +>30985 +GACGATATTGCTGCAATAGACCTTGA +>31000 +TCAGATGAGAGACAAATTAGAAT +>31256 +TAATTCGGAATGCCTGCTCTACT +>31417 +TCAATGATCGCTGTGCTCAGTAGGA +>31506 +TTTGTCTGACGTTAAAAAATATA +>31567 +TCCACATTAGGAGGATTATTAGACAAC +>31790 +AAACATAATAATTGATGGCGGAAGA +>31872 +AAGGTAATCATAGAGCACCACGGTT +>32157 +TTTCTGTGAATTCACATGCTGATGA +>32192 +TTTCTCATGTATAAAATGCTCTGATGG +>32223 +TATCTTGTTATTCTAGTGTCTTTGGTT +>32338 +TGTGGGACTCGAGCCAAAATGGCAACCT +>32497 +TGCGGTTGGACAATTTTTTTTTTATA +>32506 +TTGTTGTTTGGAGGAAGTTCCTTT +>32510 +TCTTCCGCCATCAATTATTATGTTTT +>32522 +TACTTGACTTTTCTATAGAATCTGGT +>32540 +TATCGTCAACATCCTCGAACGATCGAGA +>32626 +TTGATCAAGGTAGGGTTGTCGC +>32646 +TCTGTTAAACACCCCTGAATCGTGGAT +>32657 +TTTGGACATTTTGCAGGTGATACAAT +>32682 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>32716 +TATCTATAGTTCCGATTGGCCATCTC +>32885 +GAAAGTGGGTATCTGTATTTTAGGC +>32967 +TAAAGATACCATCTAACCTCCTTGGA +>33075 +ACAGATATTAGTGTTTTTCAAGCAGC +>33147 +ATCGCAATTATGGCATAACAGATTCGGA +>33163 +TTCAGCGTCGCTTGATTGAATAGAT +>33231 +TAAGAACTTCTGAGGTGAAGGGC +>33242 +TTGGGTTTAGAAATTAAAATTAAGGC +>33244 +TATAGAAAGTTACGAATATATTAGGA +>33248 +TTTTTGATCAATTGGCACCGTGCGAA +>33296 +TGCAGAGCTTCTTCCAACGTTGGCAAG +>33318 +TAGATGTCTGCAGGAATAACGGA +>33338 +TCGACTATTAATGGCTGTTAGAATT +>33475 +CAAACTTATCGACCATCTCCTCAAACG +>33601 +TTACCCTTTTTCCGGAGCGTTTGTGC +>33611 +AATGATCGCTGTGCTCAGTATGACGG +>33662 +TTTGATTCATTACAATTTACGCTGAA +>33737 +CAGATATTAGTGTTTTTCAAGCAGC +>33747 +TACATGTAAAGCAGCTGTGTGTGC +>33815 +AGTTTTTGGAATCACTTGA +>34032 +TGCGGACGTGTGCTCGCTGCGTGA +>34200 +TAATATATGTATAATCTGCTTGGTG +>34240 +TCTTTACGCCATATAAATCATTTCGA +>34259 +ACAAATCATAAATTTGATGGGACGA +>34268 +TCTTGTGGACTTCACTCTAG +>34297 +TAAGTAAATAGTCCCCGCCTTATTGAGG +>34337 +TGGATAATGAATGTTTTTGATTTGC +>34569 +TCTAGGTTCTTCTGAAATCGTGGGA +>34643 +TCGTGTAGACCGGATAAGATTTTTT +>34697 +TCTAATACTGTGAAAGGGTGGGG +>34808 +GTAGGATGTGCTCTGCGGTTTCCAC +>34892 +TCTGTCGCAGTTGTAGCTTGCAATA +>34896 +TGTAGTTGCCACTTATGCTGTCCA +>35151 +TAGCAATGTCCGTCTGTCCGTATGA +>35196 +TGTAAATGGTCAGCGAAAGCAAAGG +>35211 +CAGATATTAGTGTTTTTCAAGCAGCGG +>35246 +CCTCAGAGAACGTCAGACCGCG +>35272 +TGCAGAGCTTCTTCCAACGTTGG +>35273 +TCTGGTAGTAAGAAAAATGTAGCTT +>35286 +TATGTATGGATATGTATATTTATGGGT +>35317 +TTTTGGTTTGATCGTCAGGTGGTC +>35512 +GACGATAATAGTGAATTTTGGACA +>35566 +TTCGAATTCGCGCCATTTCACAATC +>35948 +TGGCCTGTATACGCTTTCTGTTG +>36009 +TTGCAAAAGTCATATCTTGAGG +>36059 +TAAGTTACTATGGATCCATAAGGGTA +>36100 +TGATATGGGACTTGTAGCTTTTTTAAA +>36121 +TGTCGTGGGCTGTGCGTTTGAGAA +>36235 +TGCTACCTTTAGCTGCAAGATTAACT +>36238 +CAGATATTAGTGTTTTTCAAGCAGCGG +>36523 +TGCGAGTCCGAGCTTTGAACGTGGG +>36607 +TCTCAATGTAATGTCTTCTTTTTGGA +>36624 +CGTTCTTTAAAACCACCAATGGGA +>36776 +TACTCACGACCATTGTCATTTCTCAAG +>36825 +CGTTCGAGGTCCACTTTCTTAGCGGA +>36850 +TCTGATGTCGGCGAGAAAGGAGTCTCA +>36872 +TCAAGGCCAGCACACAGTAACATGGT +>36968 +ACATTGGAAATACCGCGGGACCGC +>36994 +TATACGTTTTTTGACCTCTTCTCTTTGA +>37110 +TGATAATGTAAACAAAGATAAAGGG +>37210 +TCAGATGAAAGACAAATTAGAATTAAA +>37224 +GACGATAATAGTGAATTTTGGACA +>37226 +TCTGAAGTTGCCGCACTAGAGATGG +>37339 +TATACAGTCCACTATATCGTTGTTTAA +>37395 +TACAATTTACGCTGAATTTAAATGAA +>37460 +CACAAAGTAACGTGCACCACCATTT +>37766 +TCTGTGCAACTTTGTACGCGAAGTCAGC +>37836 +TCAAGGATTAATGTAGGGGGGGGG +>38096 +TTTGGAGGTAGAAGTCTTAGTGGCCGC +>38100 +GACAAAATATGCCCTTCAATTTAGA +>38113 +TATGGTCCAGAATGTAGCCTCGGC +>38213 +TAACTGTTTTTAGTTCAAAGTCTCGAA +>38224 +TTTCAGTTTTTATTGTTAGTCACAGG +>38263 +TAACTGCGAAGTCGATCAGGTCCGA +>38266 +ATCCGGACGATTGACGAGGAGCCCATT +>38271 +TTATGTCAGTGTCGAAGGCGATCGAA +>38373 +TCTATAGCCTTGGCGTAGGAACTCGCA +>38384 +GCTACAACTGCGACAGAAAATTCGGA +>38468 +TATTATCGTCCGCTGCTAAACTGC +>38491 +TGCATACTTCCGTTCTCTTTTCGGGA +>38673 +TTGTGTATTGTGATTCTGATTCGTG +>38745 +TCCCTGGCCCGCTAGACAGCAGGA +>38784 +TGTGCTAATAATTAAAGTCGACTGA +>38873 +TGTCATACTTTCGTCAAAA +>39154 +TAATAGTCAGGGCGCGAATTTTTAAAA +>39325 +TTAGGACTTATTGAACTTTACGGTA +>39359 +TATGCAAATCAAGTGTGACCGTAGCT +>39450 +TTAAACAACGATATAGTGGACAGTA +>39453 +TCTGCCGGATTTTGATCCAATCAAGG +>39819 +GTGGAATTGAAAAAGAACCAGACACA +>40040 +TTTTTGGAACTACCTGAGTCGGTT +>40183 +TAACACAAAGCAGTATGATTTAATAAT +>40189 +CAGCAAGCTGAGATGTACATTAGTATA +>40374 +TACGTTTTCTTGCAGATCAAAAA +>40445 +CGTTCTTTAAAACCACCAATGGGA +>40467 +TACGCAGATTCCTGGGAGTTACAGGA +>40488 +TGATTTGGGCTTGCATACTTGTACT +>40797 +TATATTCGTGTTCATGTGTGAACAGC +>40831 +TCTAAGAACTTCTGAGGTGAAGGGC +>40951 +TTCTTCGTAAGTCAAAATAGTGTCGCC +>40988 +TATGATTGATTGCTTGAGAGT +>42668 +TGCTGCAGAGCTTCTTCCAACGTTGG +>42751 +CCAAAGTCTGGTTGTCAGAAAATGTGC +>42777 +AAGGAAGGAACCAAAGAAGCACAAACG +>42804 +TAATTCTAATTTGTCTCTCATCTGA +>42830 +TAGTATACTTATTAAGTCATTTGA +>42857 +AGAGTATTCATCTTGAGGCGTGTG +>42886 +TGAACAACGATTTATGTATATAAGAA +>42899 +TACGATAATAGTGAATTTTGGACA +>42936 +TTTGACTAAAAGTCGCTTGTTTTGGA +>43050 +AATACATAACTCTGGACACAGGAGA +>43219 +TACTTTCGTCAAAATGTTCAGGAGCT +>43242 +TGACATGTCTATTTCCATGGGTTCGGA +>43268 +GTGTATAAAAAAATTTATTGTTGAGCA +>43299 +TCTGGATGATGGCTGATGCTCGTTG +>43491 +TCTGATGACAATGAATTTTTTAGACA +>43520 +TATTGTTGTAATTGCTGCCTCGGTTG +>43654 +ATATGAACAAAGCAAAGACACTAGAA +>43677 +CACGATTCATAACCCTCAGCTGAAG +>43776 +GCTCCTGGCAACTCTGTGATGGAC +>43903 +TATTTTGATGTTTAATGAA +>43990 +TTTCTACTTCGTATTATTTTTATGA +>44062 +AAACATAATAATTGATGGCGGAAGA +>44071 +CACAGACGCAGTGGAAACCGCAGA +>44078 +CGTGCGTCCGAGCAAAAGGTGGT +>44187 +TTATGTAAGAATATTTGTCATTAGA +>44239 +TATCAATGTTGACCGTAATACTCAA +>44253 +TGGATATTGAATGTTTTTGATTTGC +>44279 +TGTGATTTTCCCAATTTATATTAATACA +>44374 +TAGTCGGAGTTGATGAGCTGCC +>44412 +TCCGAATTTTCTGTCGCAGTTGTAGCT +>44423 +TCGGCTTGGGTTTAGAAATTAAAAT +>44558 +ACAAATCATAAATTTGGTGGGA +>44637 +TTTTTTATCAATTGGCACCATGCGAA +>44642 +TACTGTGTGCTGGCCTTGATGAAAGT +>44688 +TAGATGTCTGCACGAATAACGGA +>44945 +TGCTTCCGAGCAATCTACGTTGGTAAAA +>45054 +TCTCATCTGACAATTTTTTAAAAGCGA +>45088 +TCTGAAGCAGCGCTCACGGCAGAATGC +>45203 +TGCCGGATTTTGATCCAATCAAGGGA +>45263 +TATTGATTTTCCTATTTAGTTGAACA +>45274 +AAAAGTCTGGATATTGTAGGATAGGA +>45358 +CAACGCTGGACCTTGGACTCGAGGGC +>45396 +CATCTTGTTATTCTAGTGTCTTTGGTA +>45641 +GAACTAAGATCAGTCGACTGTAATTAT +>45774 +CGACGAACTAGCAGCTCTGGTGT +>45911 +CAGATATTAGTGTTTTTCAAGCAGC +>45997 +TTAACCAGTCGGCGTTGTTTAAGTAGC +>46031 +AAAGCGTCTACTTGAACAATGAGA +>46054 +TTAGATCGTATTACTTGGGTGCTGG +>46199 +TAAGCGTTAGGTCGTATTACTTGGGC +>46255 +TAAGATTGAAAATTACTGTGGAGTAAT +>46366 +TAAGGAATTGTCGGCCATTTAATGTGA +>46387 +CAGAGCTTCTTCCAACGTTGGCAAG +>46701 +TTGTATCTTTTTGCTTTTTATATT +>46705 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>46783 +TCTCATCTGACAATTTTTTAAAAGCGA +>46836 +TCATAAGGACAGACGGACAT +>46892 +AAACATAATAATTGATGGCGGAAGA +>47244 +TCTCTGTCCGCTCGCTTACGATGAGA +>47290 +TTTCAGGAATGGGGTCGTCCCACTA +>47348 +TGCAAAAGTAATATCAAAGACACTA +>47359 +TTTAAAATTATAATAGTCAGGG +>47427 +CATCTTGTTATTCTAGTGTCTTTGGT +>47654 +TTAAACACTGAATTCGGTTTCGAAA +>47656 +TTTCTGAAGAATCCTGTAACTCCC +>47785 +TGTAGATGAGCGGCAAATGTGG +>47827 +TTGATCAAGGTAGAGTTGTCGCGC +>48184 +TTCAAGGATTAATGTAGGGGGGG +>48576 +TTAACCCGGAGACTTGGGTGTGGGT +>48764 +TGCAAAAGTAATATCAAAGACAATAGA +>48782 +TGACAATGTAGTGAACGCCAGTGT +>48893 +TATAAATGCCGTCTGATATTATTAAA +>49017 +ACCGGATGTCATATCCAGCGTCGTGAA +>49381 +TACAATGTAAATTCGTTTCTTCGATCA +>49456 +TTTTGGTTTGATCGTCAGGTGGACGC +>49484 +TCTGCTATCATTGACTCGATCATTGA +>49569 +TCGATTGTATGATCAGTGGAAGTGGC +>49602 +GAAATTGGCCAACATTAATTCGGAA +>49628 +TACTTTCGTCAAAATGTTCAGGAGC +>49715 +TTTGTCCGGGTGCTTCGAAAGAACTCT +>49778 +CAATAGCGTCGCTGAGTAACAGTG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unmatched_fastq.fa Mon Apr 11 00:27:41 2022 +0000 @@ -0,0 +1,78 @@ +>HWI-176 +CGAGACTTTGAACTAAAAACAGT +>HWI-277 +TCAGATGAGAGACAAATTAGAAT +>HWI-458 +TTTCGAGGTTCCGAATTTTCTGTC +>HWI-778 +TAGGATGTGCTCTGCGGTTTCCACT +>HWI-895 +TTTAATTGGCGCAGTCGGTAGGATC +>HWI-967 +GAGAATGACGGAACTGTAATA +>HWI-974 +TTGCGAGGTCGTCCTGGGAGACCAG +>HWI-1009 +AACGTTGGAAGAAGCTCTGCAGC +>HWI-1011 +TTTAATGTTAACACGGACATTGACC +>HWI-1014 +TCTCCGACTTGCTGAGCTGTTTCCGCCG +>HWI-1106 +CATCTTGTTATTCTATTGTCTTTGGTC +>HWI-1202 +AAAGTAAATTCTGATGACTTCAAAAT +>HWI-1227 +TACCATGTAAATTCGTTTCTTCG +>HWI-1262 +TATTAGCTCAAAGAACAGCTCGT +>HWI-1297 +TTGCAGCAATATCGTCAACATCCTC +>HWI-1520 +TAATATCGAAGCCGAACTGAGAACA +>HWI-1584 +TAAAGTTATGACAAGAATTGATGTT +>HWI-1590 +TCATATGCCAATTTCGTGTTTCGATG +>HWI-1609 +TAAGATTGAAAATTACTGTGGAGTAAT +>HWI-1745 +TTCATCCTGCTGCCGGAGC +>HWI-1816 +CTTCCGTTATTCCTGCAGACA +>HWI-1845 +TAATATCGAACCCGAACTGAGAA +>HWI-1852 +TGGACACCGTCGTTCCACTTGAACT +>HWI-1887 +TCAAGGTAGGGTTGTCGCGTGTATTT +>HWI-1892 +TATTATCGTCCGCTGCTAAACTG +>HWI-1936 +TAGCTGCAACTTCTGAAGTTATGGC +>HWI-1997 +CAAAACAAGAATTTTTCGCATGGTGCC +>HWI-2055 +TAAACTAATTCTGTCGGTTTTCTGT +>HWI-2075 +AGACTTTGAACTAAAAACAGTTACCT +>HWI-2244 +TAACTTCTTCTATTTTCGTGCGGGA +>HWI-2253 +TAACTGCCCTCGATCACGCCTTCCCGA +>HWI-2277 +TGACGACGACAGTATCGCAAGACGGT +>HWI-2409 +GTCAGATGAGAGACAAATTAGAAT +>HWI-2418 +TAATATCGAACCCGAACTGAGAAC +>HWI-2490 +TTTATGACAACCCGATCGGACCTCACTC +>HWI-2557 +TGTCCGGAGATCAAGAAGTGTTGGCAC +>HWI-2670 +TCAGATGAGAGACAAATTAGAATTAAA +>HWI-2719 +TATTATCGTCCGCTGCTAAACTGCTG +>HWI-2834 +CCTAGGAGAAGGAAGTCAAGAAGGCC