Mercurial > repos > artbio > sr_bowtie_dataset_annotation
changeset 8:3519c2de7fac draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit e62066428e669516e024d081933ee8c7f953ba1b"
author | artbio |
---|---|
date | Sat, 09 Apr 2022 22:45:21 +0000 |
parents | 3bddd7ab96e3 |
children | 6bf9de09aa74 |
files | sr_bowtie_dataset_annotation.xml test-data/unmatched_1.fa test-data/unmatched_2.fa test-data/unmatched_3.fa |
diffstat | 4 files changed, 1157 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/sr_bowtie_dataset_annotation.xml Sun Oct 24 23:52:11 2021 +0000 +++ b/sr_bowtie_dataset_annotation.xml Sat Apr 09 22:45:21 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.5.0"> +<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.6"> <description>by iterative alignments with sRbowtie</description> <requirements> <requirement type="package" version="1.3.1">bowtie</requirement> @@ -59,9 +59,11 @@ remaining=\$(( \$(wc -l < class_unmatched.fa)/2)) && echo -e "$sample\tNot classified\t\${remaining}\t\${genome_aligned}" >> $output && #end for - - Rscript $__tool_directory__/barplot.r --input $output --barplot $barplot + #if $format == '-q': + && mv class_unmatched.fa class_unmatched.fastq + && sed -n '1~4s/^@/>/p;2~4p' class_unmatched.fastq > class_unmatched.fa + #end if ]]></command> <inputs> <param name="input" type="data" multiple="True" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/> @@ -94,6 +96,7 @@ <!-- End of other bowtie index selections --> </inputs> <outputs> + <data format="fasta" name="unmatched" label="Annotate smRNAs: Unmatched reads" from_work_dir="class_unmatched.fa" /> <data format="tabular" name="output" label="Cascade Annotation Analysis"> <actions> <action name="column_names" type="metadata" default="Sample,Reference Index,Number of reads, Total reads" /> @@ -110,6 +113,7 @@ <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> <output name="output" ftype="tabular" file="sample1_output.tab" /> <output name="barplot" ftype="pdf" file="sample1_output.pdf" compare="sim_size" delta="500"/> + <output name="unmatched" ftype="fasta" file="unmatched_1.fa" /> </test> <test> <param name="input" value ="sample.fastq" ftype="fastq" /> @@ -119,6 +123,7 @@ <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> <output name="output" ftype="tabular" file="sample_output.tab" /> <output name="barplot" ftype="pdf" file="sample_output.pdf" compare="sim_size" delta="500"/> + <output name="unmatched" ftype="fasta" file="unmatched_2.fa" /> </test> <test> <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" /> @@ -128,6 +133,7 @@ <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> <output name="output" ftype="tabular" file="multisample5_output.tab" /> <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" /> + <output name="unmatched" ftype="fasta" file="unmatched_3.fa" /> </test> </tests> <help> @@ -172,7 +178,16 @@ **OUTPUTS** -**Annotation table in a tabular format** +**- Annotation table in a tabular format** + +**- Pie Charts of class abundances** + +**- Unmatched reads in fasta format** </help> + + <citations> + <citation type="doi">10.1038/nature11416</citation> + </citations> + </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unmatched_1.fa Sat Apr 09 22:45:21 2022 +0000 @@ -0,0 +1,530 @@ +>25207 +TCGCACTAGCTTCGGTCGTCTCGGA +>25229 +TCTGTTCGAGGTTCTTTATTTGAA +>25283 +TAAGGGAGCGAGATCGTTCGATGCA +>25363 +TAGTGTTGGGTGTAGGCCAGGGAG +>25376 +TCCGCCGACAATGTTGCAACATGA +>25435 +TTTCTTCAAGCTGCGCGTTTTTCGG +>25593 +TCGAAAACTTCTTCGCACTCTTCGTT +>25660 +TCTCGCTGCGTGAACGATGAAGGC +>25968 +TTTGCTAGGACTGCTGCATAAG +>26220 +GCCCGTGTCGATGAATGCTTCAAACT +>26261 +TATTATCGTCCGCTGCTAAACTGC +>26543 +CTTATGTATAAAAAGCTCTGATGGA +>26610 +TCCTTAAGTTCGTTGATTTGGGCT +>26675 +CAATATCGTCAACATCCTCGAACGAT +>26873 +TTGGGAGCTGAATCCCGTTACGGTA +>26978 +CGCGCATTAGCTCAAATCTAGTTGGA +>27220 +TGTTTATTTGTCAAGTTTAGATAATA +>27397 +TAGTTCACAGCTGTATGTCCAGATGGGT +>27869 +TATGGTCCAGAATGTAGCCTCGGC +>27878 +TATTAATCGGGCCACAAATATCGGTA +>28035 +GGTATTCTTTGCGAGGTCGTCCTGG +>28154 +AAAGACGAGAACGCGTATATGTGTGC +>28319 +TGACCAACCTTAAAAGATCGGGGT +>28387 +TATTACTATTTCTAAGCTTTGTTTAAA +>28594 +TTTTGAGGTTGGTCAAGAAGTTGTT +>28608 +TAAGATTGAAAATTACTGTGGAGT +>28668 +TACAGATTCTAGAGACAAAGACGC +>28674 +TAAGTACATGCGCCCAGCCGCCGTGA +>28836 +TCTGGTTAAGGTCGGAATACTCGTCT +>29056 +TGCTTTACATACCCTTTGGTGCCC +>29323 +TTTTGCTGCAGAGCTTCTTCCAACGT +>29342 +TATGTATGGATATGTATATTTATGGT +>29387 +TGATAATCGACCTCTTCCATCGTTGT +>29423 +TCTAAGAACTTCTGAGGTGAAGG +>29462 +TGCATTTCAATCGGAAGAGTACTCTG +>29492 +TAAAACACAAATCTCGACATACAGA +>29703 +GTACAGGTTCTGATGACAATG +>29785 +TCAGATGAAAGACAAATTAGAATT +>30053 +TAAGAACTTCTGAGATGAAGGGC +>30096 +TGGATATTGAATGTTTTTGATTTGC +>30136 +TGTAGTCGTCGTATGTCCGGA +>30272 +TGCATTCGTGGATTCGCATTCGAGA +>30497 +TGACAAAATATGCCCTTCACCTCAGA +>30787 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>30836 +TAAATATTTTTTTTGAAACA +>30977 +CGACTGTAATTATTAGCACAATACT +>30985 +GACGATATTGCTGCAATAGACCTTGA +>31000 +TCAGATGAGAGACAAATTAGAAT +>31256 +TAATTCGGAATGCCTGCTCTACT +>31417 +TCAATGATCGCTGTGCTCAGTAGGA +>31506 +TTTGTCTGACGTTAAAAAATATA +>31567 +TCCACATTAGGAGGATTATTAGACAAC +>31790 +AAACATAATAATTGATGGCGGAAGA +>31872 +AAGGTAATCATAGAGCACCACGGTT +>32157 +TTTCTGTGAATTCACATGCTGATGA +>32192 +TTTCTCATGTATAAAATGCTCTGATGG +>32223 +TATCTTGTTATTCTAGTGTCTTTGGTT +>32338 +TGTGGGACTCGAGCCAAAATGGCAACCT +>32497 +TGCGGTTGGACAATTTTTTTTTTATA +>32506 +TTGTTGTTTGGAGGAAGTTCCTTT +>32510 +TCTTCCGCCATCAATTATTATGTTTT +>32522 +TACTTGACTTTTCTATAGAATCTGGT +>32540 +TATCGTCAACATCCTCGAACGATCGAGA +>32626 +TTGATCAAGGTAGGGTTGTCGC +>32646 +TCTGTTAAACACCCCTGAATCGTGGAT +>32657 +TTTGGACATTTTGCAGGTGATACAAT +>32682 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>32716 +TATCTATAGTTCCGATTGGCCATCTC +>32885 +GAAAGTGGGTATCTGTATTTTAGGC +>32967 +TAAAGATACCATCTAACCTCCTTGGA +>33075 +ACAGATATTAGTGTTTTTCAAGCAGC +>33147 +ATCGCAATTATGGCATAACAGATTCGGA +>33163 +TTCAGCGTCGCTTGATTGAATAGAT +>33231 +TAAGAACTTCTGAGGTGAAGGGC +>33242 +TTGGGTTTAGAAATTAAAATTAAGGC +>33244 +TATAGAAAGTTACGAATATATTAGGA +>33248 +TTTTTGATCAATTGGCACCGTGCGAA +>33296 +TGCAGAGCTTCTTCCAACGTTGGCAAG +>33318 +TAGATGTCTGCAGGAATAACGGA +>33338 +TCGACTATTAATGGCTGTTAGAATT +>33475 +CAAACTTATCGACCATCTCCTCAAACG +>33601 +TTACCCTTTTTCCGGAGCGTTTGTGC +>33611 +AATGATCGCTGTGCTCAGTATGACGG +>33662 +TTTGATTCATTACAATTTACGCTGAA +>33737 +CAGATATTAGTGTTTTTCAAGCAGC +>33747 +TACATGTAAAGCAGCTGTGTGTGC +>33815 +AGTTTTTGGAATCACTTGA +>34032 +TGCGGACGTGTGCTCGCTGCGTGA +>34200 +TAATATATGTATAATCTGCTTGGTG +>34240 +TCTTTACGCCATATAAATCATTTCGA +>34259 +ACAAATCATAAATTTGATGGGACGA +>34268 +TCTTGTGGACTTCACTCTAG +>34297 +TAAGTAAATAGTCCCCGCCTTATTGAGG +>34337 +TGGATAATGAATGTTTTTGATTTGC +>34569 +TCTAGGTTCTTCTGAAATCGTGGGA +>34643 +TCGTGTAGACCGGATAAGATTTTTT +>34697 +TCTAATACTGTGAAAGGGTGGGG +>34808 +GTAGGATGTGCTCTGCGGTTTCCAC +>34892 +TCTGTCGCAGTTGTAGCTTGCAATA +>34896 +TGTAGTTGCCACTTATGCTGTCCA +>35151 +TAGCAATGTCCGTCTGTCCGTATGA +>35196 +TGTAAATGGTCAGCGAAAGCAAAGG +>35211 +CAGATATTAGTGTTTTTCAAGCAGCGG +>35246 +CCTCAGAGAACGTCAGACCGCG +>35272 +TGCAGAGCTTCTTCCAACGTTGG +>35273 +TCTGGTAGTAAGAAAAATGTAGCTT +>35286 +TATGTATGGATATGTATATTTATGGGT +>35317 +TTTTGGTTTGATCGTCAGGTGGTC +>35512 +GACGATAATAGTGAATTTTGGACA +>35566 +TTCGAATTCGCGCCATTTCACAATC +>35948 +TGGCCTGTATACGCTTTCTGTTG +>36009 +TTGCAAAAGTCATATCTTGAGG +>36059 +TAAGTTACTATGGATCCATAAGGGTA +>36100 +TGATATGGGACTTGTAGCTTTTTTAAA +>36121 +TGTCGTGGGCTGTGCGTTTGAGAA +>36235 +TGCTACCTTTAGCTGCAAGATTAACT +>36238 +CAGATATTAGTGTTTTTCAAGCAGCGG +>36523 +TGCGAGTCCGAGCTTTGAACGTGGG +>36607 +TCTCAATGTAATGTCTTCTTTTTGGA +>36624 +CGTTCTTTAAAACCACCAATGGGA +>36776 +TACTCACGACCATTGTCATTTCTCAAG +>36825 +CGTTCGAGGTCCACTTTCTTAGCGGA +>36850 +TCTGATGTCGGCGAGAAAGGAGTCTCA +>36872 +TCAAGGCCAGCACACAGTAACATGGT +>36968 +ACATTGGAAATACCGCGGGACCGC +>36994 +TATACGTTTTTTGACCTCTTCTCTTTGA +>37110 +TGATAATGTAAACAAAGATAAAGGG +>37210 +TCAGATGAAAGACAAATTAGAATTAAA +>37224 +GACGATAATAGTGAATTTTGGACA +>37226 +TCTGAAGTTGCCGCACTAGAGATGG +>37339 +TATACAGTCCACTATATCGTTGTTTAA +>37395 +TACAATTTACGCTGAATTTAAATGAA +>37460 +CACAAAGTAACGTGCACCACCATTT +>37766 +TCTGTGCAACTTTGTACGCGAAGTCAGC +>37836 +TCAAGGATTAATGTAGGGGGGGGG +>38096 +TTTGGAGGTAGAAGTCTTAGTGGCCGC +>38100 +GACAAAATATGCCCTTCAATTTAGA +>38113 +TATGGTCCAGAATGTAGCCTCGGC +>38213 +TAACTGTTTTTAGTTCAAAGTCTCGAA +>38224 +TTTCAGTTTTTATTGTTAGTCACAGG +>38263 +TAACTGCGAAGTCGATCAGGTCCGA +>38266 +ATCCGGACGATTGACGAGGAGCCCATT +>38271 +TTATGTCAGTGTCGAAGGCGATCGAA +>38373 +TCTATAGCCTTGGCGTAGGAACTCGCA +>38384 +GCTACAACTGCGACAGAAAATTCGGA +>38468 +TATTATCGTCCGCTGCTAAACTGC +>38491 +TGCATACTTCCGTTCTCTTTTCGGGA +>38673 +TTGTGTATTGTGATTCTGATTCGTG +>38745 +TCCCTGGCCCGCTAGACAGCAGGA +>38784 +TGTGCTAATAATTAAAGTCGACTGA +>38873 +TGTCATACTTTCGTCAAAA +>39154 +TAATAGTCAGGGCGCGAATTTTTAAAA +>39325 +TTAGGACTTATTGAACTTTACGGTA +>39359 +TATGCAAATCAAGTGTGACCGTAGCT +>39450 +TTAAACAACGATATAGTGGACAGTA +>39453 +TCTGCCGGATTTTGATCCAATCAAGG +>39819 +GTGGAATTGAAAAAGAACCAGACACA +>40040 +TTTTTGGAACTACCTGAGTCGGTT +>40183 +TAACACAAAGCAGTATGATTTAATAAT +>40189 +CAGCAAGCTGAGATGTACATTAGTATA +>40374 +TACGTTTTCTTGCAGATCAAAAA +>40445 +CGTTCTTTAAAACCACCAATGGGA +>40467 +TACGCAGATTCCTGGGAGTTACAGGA +>40488 +TGATTTGGGCTTGCATACTTGTACT +>40797 +TATATTCGTGTTCATGTGTGAACAGC +>40831 +TCTAAGAACTTCTGAGGTGAAGGGC +>40951 +TTCTTCGTAAGTCAAAATAGTGTCGCC +>40988 +TATGATTGATTGCTTGAGAGT +>40998 +TGATAGAGCTGCATTTGAATTAACGG +>41061 +CTCTTTCCGCTCACTCCCGCTGAGA +>41076 +TGCATTAAGAAGATTTAGGATCC +>41140 +TCAAGGATTAATGTAGGGGGGG +>41143 +CAATAGCGTCGCTGAGTAACAGTG +>41162 +TACGGATTGCAGCGGCTAG +>41183 +TTTTTTGGCACACGATTTTTTGGACGT +>41227 +TAATATATGTATACTCTGCTTGGTG +>41265 +TTTATGATTTTTGGTAATA +>41365 +TAGGAGGGTTCCACAACTATTTCGGGG +>41837 +TAGTTTGACACTGTTTGGAGACGTGG +>42053 +TCTGTGGTCGAATCGAAGGAGTGC +>42113 +TAATAGATCGCTCACCTGTTCCTGG +>42392 +CGGCATCGGAAAACTCCCAGCGGGGC +>42504 +TAACGTTATTATTATTTGAAAATAGAA +>42566 +TTTGTTGGGTAGGAACTTTACTGC +>42668 +TGCTGCAGAGCTTCTTCCAACGTTGG +>42751 +CCAAAGTCTGGTTGTCAGAAAATGTGC +>42777 +AAGGAAGGAACCAAAGAAGCACAAACG +>42804 +TAATTCTAATTTGTCTCTCATCTGA +>42830 +TAGTATACTTATTAAGTCATTTGA +>42857 +AGAGTATTCATCTTGAGGCGTGTG +>42886 +TGAACAACGATTTATGTATATAAGAA +>42899 +TACGATAATAGTGAATTTTGGACA +>42936 +TTTGACTAAAAGTCGCTTGTTTTGGA +>43050 +AATACATAACTCTGGACACAGGAGA +>43219 +TACTTTCGTCAAAATGTTCAGGAGCT +>43242 +TGACATGTCTATTTCCATGGGTTCGGA +>43268 +GTGTATAAAAAAATTTATTGTTGAGCA +>43299 +TCTGGATGATGGCTGATGCTCGTTG +>43491 +TCTGATGACAATGAATTTTTTAGACA +>43520 +TATTGTTGTAATTGCTGCCTCGGTTG +>43654 +ATATGAACAAAGCAAAGACACTAGAA +>43677 +CACGATTCATAACCCTCAGCTGAAG +>43776 +GCTCCTGGCAACTCTGTGATGGAC +>43903 +TATTTTGATGTTTAATGAA +>43990 +TTTCTACTTCGTATTATTTTTATGA +>44062 +AAACATAATAATTGATGGCGGAAGA +>44071 +CACAGACGCAGTGGAAACCGCAGA +>44078 +CGTGCGTCCGAGCAAAAGGTGGT +>44187 +TTATGTAAGAATATTTGTCATTAGA +>44239 +TATCAATGTTGACCGTAATACTCAA +>44253 +TGGATATTGAATGTTTTTGATTTGC +>44279 +TGTGATTTTCCCAATTTATATTAATACA +>44374 +TAGTCGGAGTTGATGAGCTGCC +>44412 +TCCGAATTTTCTGTCGCAGTTGTAGCT +>44423 +TCGGCTTGGGTTTAGAAATTAAAAT +>44558 +ACAAATCATAAATTTGGTGGGA +>44637 +TTTTTTATCAATTGGCACCATGCGAA +>44642 +TACTGTGTGCTGGCCTTGATGAAAGT +>44688 +TAGATGTCTGCACGAATAACGGA +>44945 +TGCTTCCGAGCAATCTACGTTGGTAAAA +>45054 +TCTCATCTGACAATTTTTTAAAAGCGA +>45088 +TCTGAAGCAGCGCTCACGGCAGAATGC +>45203 +TGCCGGATTTTGATCCAATCAAGGGA +>45263 +TATTGATTTTCCTATTTAGTTGAACA +>45274 +AAAAGTCTGGATATTGTAGGATAGGA +>45358 +CAACGCTGGACCTTGGACTCGAGGGC +>45396 +CATCTTGTTATTCTAGTGTCTTTGGTA +>45641 +GAACTAAGATCAGTCGACTGTAATTAT +>45774 +CGACGAACTAGCAGCTCTGGTGT +>45911 +CAGATATTAGTGTTTTTCAAGCAGC +>45997 +TTAACCAGTCGGCGTTGTTTAAGTAGC +>46031 +AAAGCGTCTACTTGAACAATGAGA +>46054 +TTAGATCGTATTACTTGGGTGCTGG +>46199 +TAAGCGTTAGGTCGTATTACTTGGGC +>46255 +TAAGATTGAAAATTACTGTGGAGTAAT +>46366 +TAAGGAATTGTCGGCCATTTAATGTGA +>46387 +CAGAGCTTCTTCCAACGTTGGCAAG +>46701 +TTGTATCTTTTTGCTTTTTATATT +>46705 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>46783 +TCTCATCTGACAATTTTTTAAAAGCGA +>46836 +TCATAAGGACAGACGGACAT +>46892 +AAACATAATAATTGATGGCGGAAGA +>47244 +TCTCTGTCCGCTCGCTTACGATGAGA +>47290 +TTTCAGGAATGGGGTCGTCCCACTA +>47348 +TGCAAAAGTAATATCAAAGACACTA +>47359 +TTTAAAATTATAATAGTCAGGG +>47427 +CATCTTGTTATTCTAGTGTCTTTGGT +>47654 +TTAAACACTGAATTCGGTTTCGAAA +>47656 +TTTCTGAAGAATCCTGTAACTCCC +>47785 +TGTAGATGAGCGGCAAATGTGG +>47827 +TTGATCAAGGTAGAGTTGTCGCGC +>48184 +TTCAAGGATTAATGTAGGGGGGG +>48576 +TTAACCCGGAGACTTGGGTGTGGGT +>48764 +TGCAAAAGTAATATCAAAGACAATAGA +>48782 +TGACAATGTAGTGAACGCCAGTGT +>48893 +TATAAATGCCGTCTGATATTATTAAA +>49017 +ACCGGATGTCATATCCAGCGTCGTGAA +>49381 +TACAATGTAAATTCGTTTCTTCGATCA +>49456 +TTTTGGTTTGATCGTCAGGTGGACGC +>49484 +TCTGCTATCATTGACTCGATCATTGA +>49569 +TCGATTGTATGATCAGTGGAAGTGGC +>49602 +GAAATTGGCCAACATTAATTCGGAA +>49628 +TACTTTCGTCAAAATGTTCAGGAGC +>49715 +TTTGTCCGGGTGCTTCGAAAGAACTCT +>49778 +CAATAGCGTCGCTGAGTAACAGTG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unmatched_2.fa Sat Apr 09 22:45:21 2022 +0000 @@ -0,0 +1,78 @@ +>HWI-176 +CGAGACTTTGAACTAAAAACAGT +>HWI-277 +TCAGATGAGAGACAAATTAGAAT +>HWI-458 +TTTCGAGGTTCCGAATTTTCTGTC +>HWI-778 +TAGGATGTGCTCTGCGGTTTCCACT +>HWI-895 +TTTAATTGGCGCAGTCGGTAGGATC +>HWI-967 +GAGAATGACGGAACTGTAATA +>HWI-974 +TTGCGAGGTCGTCCTGGGAGACCAG +>HWI-1009 +AACGTTGGAAGAAGCTCTGCAGC +>HWI-1011 +TTTAATGTTAACACGGACATTGACC +>HWI-1014 +TCTCCGACTTGCTGAGCTGTTTCCGCCG +>HWI-1106 +CATCTTGTTATTCTATTGTCTTTGGTC +>HWI-1202 +AAAGTAAATTCTGATGACTTCAAAAT +>HWI-1227 +TACCATGTAAATTCGTTTCTTCG +>HWI-1262 +TATTAGCTCAAAGAACAGCTCGT +>HWI-1297 +TTGCAGCAATATCGTCAACATCCTC +>HWI-1520 +TAATATCGAAGCCGAACTGAGAACA +>HWI-1584 +TAAAGTTATGACAAGAATTGATGTT +>HWI-1590 +TCATATGCCAATTTCGTGTTTCGATG +>HWI-1609 +TAAGATTGAAAATTACTGTGGAGTAAT +>HWI-1745 +TTCATCCTGCTGCCGGAGC +>HWI-1816 +CTTCCGTTATTCCTGCAGACA +>HWI-1845 +TAATATCGAACCCGAACTGAGAA +>HWI-1852 +TGGACACCGTCGTTCCACTTGAACT +>HWI-1887 +TCAAGGTAGGGTTGTCGCGTGTATTT +>HWI-1892 +TATTATCGTCCGCTGCTAAACTG +>HWI-1936 +TAGCTGCAACTTCTGAAGTTATGGC +>HWI-1997 +CAAAACAAGAATTTTTCGCATGGTGCC +>HWI-2055 +TAAACTAATTCTGTCGGTTTTCTGT +>HWI-2075 +AGACTTTGAACTAAAAACAGTTACCT +>HWI-2244 +TAACTTCTTCTATTTTCGTGCGGGA +>HWI-2253 +TAACTGCCCTCGATCACGCCTTCCCGA +>HWI-2277 +TGACGACGACAGTATCGCAAGACGGT +>HWI-2409 +GTCAGATGAGAGACAAATTAGAAT +>HWI-2418 +TAATATCGAACCCGAACTGAGAAC +>HWI-2490 +TTTATGACAACCCGATCGGACCTCACTC +>HWI-2557 +TGTCCGGAGATCAAGAAGTGTTGGCAC +>HWI-2670 +TCAGATGAGAGACAAATTAGAATTAAA +>HWI-2719 +TATTATCGTCCGCTGCTAAACTGCTG +>HWI-2834 +CCTAGGAGAAGGAAGTCAAGAAGGCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unmatched_3.fa Sat Apr 09 22:45:21 2022 +0000 @@ -0,0 +1,530 @@ +>25207 +TCGCACTAGCTTCGGTCGTCTCGGA +>25229 +TCTGTTCGAGGTTCTTTATTTGAA +>25283 +TAAGGGAGCGAGATCGTTCGATGCA +>25363 +TAGTGTTGGGTGTAGGCCAGGGAG +>25376 +TCCGCCGACAATGTTGCAACATGA +>25435 +TTTCTTCAAGCTGCGCGTTTTTCGG +>25593 +TCGAAAACTTCTTCGCACTCTTCGTT +>25660 +TCTCGCTGCGTGAACGATGAAGGC +>25968 +TTTGCTAGGACTGCTGCATAAG +>26220 +GCCCGTGTCGATGAATGCTTCAAACT +>26261 +TATTATCGTCCGCTGCTAAACTGC +>26543 +CTTATGTATAAAAAGCTCTGATGGA +>26610 +TCCTTAAGTTCGTTGATTTGGGCT +>26675 +CAATATCGTCAACATCCTCGAACGAT +>26873 +TTGGGAGCTGAATCCCGTTACGGTA +>26978 +CGCGCATTAGCTCAAATCTAGTTGGA +>27220 +TGTTTATTTGTCAAGTTTAGATAATA +>27397 +TAGTTCACAGCTGTATGTCCAGATGGGT +>27869 +TATGGTCCAGAATGTAGCCTCGGC +>27878 +TATTAATCGGGCCACAAATATCGGTA +>28035 +GGTATTCTTTGCGAGGTCGTCCTGG +>28154 +AAAGACGAGAACGCGTATATGTGTGC +>28319 +TGACCAACCTTAAAAGATCGGGGT +>28387 +TATTACTATTTCTAAGCTTTGTTTAAA +>28594 +TTTTGAGGTTGGTCAAGAAGTTGTT +>28608 +TAAGATTGAAAATTACTGTGGAGT +>28668 +TACAGATTCTAGAGACAAAGACGC +>28674 +TAAGTACATGCGCCCAGCCGCCGTGA +>28836 +TCTGGTTAAGGTCGGAATACTCGTCT +>29056 +TGCTTTACATACCCTTTGGTGCCC +>29323 +TTTTGCTGCAGAGCTTCTTCCAACGT +>29342 +TATGTATGGATATGTATATTTATGGT +>29387 +TGATAATCGACCTCTTCCATCGTTGT +>29423 +TCTAAGAACTTCTGAGGTGAAGG +>29462 +TGCATTTCAATCGGAAGAGTACTCTG +>29492 +TAAAACACAAATCTCGACATACAGA +>29703 +GTACAGGTTCTGATGACAATG +>29785 +TCAGATGAAAGACAAATTAGAATT +>30053 +TAAGAACTTCTGAGATGAAGGGC +>30096 +TGGATATTGAATGTTTTTGATTTGC +>30136 +TGTAGTCGTCGTATGTCCGGA +>30272 +TGCATTCGTGGATTCGCATTCGAGA +>30497 +TGACAAAATATGCCCTTCACCTCAGA +>30787 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>30836 +TAAATATTTTTTTTGAAACA +>30977 +CGACTGTAATTATTAGCACAATACT +>30985 +GACGATATTGCTGCAATAGACCTTGA +>31000 +TCAGATGAGAGACAAATTAGAAT +>31256 +TAATTCGGAATGCCTGCTCTACT +>31417 +TCAATGATCGCTGTGCTCAGTAGGA +>31506 +TTTGTCTGACGTTAAAAAATATA +>31567 +TCCACATTAGGAGGATTATTAGACAAC +>31790 +AAACATAATAATTGATGGCGGAAGA +>31872 +AAGGTAATCATAGAGCACCACGGTT +>32157 +TTTCTGTGAATTCACATGCTGATGA +>32192 +TTTCTCATGTATAAAATGCTCTGATGG +>32223 +TATCTTGTTATTCTAGTGTCTTTGGTT +>32338 +TGTGGGACTCGAGCCAAAATGGCAACCT +>32497 +TGCGGTTGGACAATTTTTTTTTTATA +>32506 +TTGTTGTTTGGAGGAAGTTCCTTT +>32510 +TCTTCCGCCATCAATTATTATGTTTT +>32522 +TACTTGACTTTTCTATAGAATCTGGT +>32540 +TATCGTCAACATCCTCGAACGATCGAGA +>32626 +TTGATCAAGGTAGGGTTGTCGC +>32646 +TCTGTTAAACACCCCTGAATCGTGGAT +>32657 +TTTGGACATTTTGCAGGTGATACAAT +>32682 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>32716 +TATCTATAGTTCCGATTGGCCATCTC +>32885 +GAAAGTGGGTATCTGTATTTTAGGC +>32967 +TAAAGATACCATCTAACCTCCTTGGA +>33075 +ACAGATATTAGTGTTTTTCAAGCAGC +>33147 +ATCGCAATTATGGCATAACAGATTCGGA +>33163 +TTCAGCGTCGCTTGATTGAATAGAT +>33231 +TAAGAACTTCTGAGGTGAAGGGC +>33242 +TTGGGTTTAGAAATTAAAATTAAGGC +>33244 +TATAGAAAGTTACGAATATATTAGGA +>33248 +TTTTTGATCAATTGGCACCGTGCGAA +>33296 +TGCAGAGCTTCTTCCAACGTTGGCAAG +>33318 +TAGATGTCTGCAGGAATAACGGA +>33338 +TCGACTATTAATGGCTGTTAGAATT +>33475 +CAAACTTATCGACCATCTCCTCAAACG +>33601 +TTACCCTTTTTCCGGAGCGTTTGTGC +>33611 +AATGATCGCTGTGCTCAGTATGACGG +>33662 +TTTGATTCATTACAATTTACGCTGAA +>33737 +CAGATATTAGTGTTTTTCAAGCAGC +>33747 +TACATGTAAAGCAGCTGTGTGTGC +>33815 +AGTTTTTGGAATCACTTGA +>34032 +TGCGGACGTGTGCTCGCTGCGTGA +>34200 +TAATATATGTATAATCTGCTTGGTG +>34240 +TCTTTACGCCATATAAATCATTTCGA +>34259 +ACAAATCATAAATTTGATGGGACGA +>34268 +TCTTGTGGACTTCACTCTAG +>34297 +TAAGTAAATAGTCCCCGCCTTATTGAGG +>34337 +TGGATAATGAATGTTTTTGATTTGC +>34569 +TCTAGGTTCTTCTGAAATCGTGGGA +>34643 +TCGTGTAGACCGGATAAGATTTTTT +>34697 +TCTAATACTGTGAAAGGGTGGGG +>34808 +GTAGGATGTGCTCTGCGGTTTCCAC +>34892 +TCTGTCGCAGTTGTAGCTTGCAATA +>34896 +TGTAGTTGCCACTTATGCTGTCCA +>35151 +TAGCAATGTCCGTCTGTCCGTATGA +>35196 +TGTAAATGGTCAGCGAAAGCAAAGG +>35211 +CAGATATTAGTGTTTTTCAAGCAGCGG +>35246 +CCTCAGAGAACGTCAGACCGCG +>35272 +TGCAGAGCTTCTTCCAACGTTGG +>35273 +TCTGGTAGTAAGAAAAATGTAGCTT +>35286 +TATGTATGGATATGTATATTTATGGGT +>35317 +TTTTGGTTTGATCGTCAGGTGGTC +>35512 +GACGATAATAGTGAATTTTGGACA +>35566 +TTCGAATTCGCGCCATTTCACAATC +>35948 +TGGCCTGTATACGCTTTCTGTTG +>36009 +TTGCAAAAGTCATATCTTGAGG +>36059 +TAAGTTACTATGGATCCATAAGGGTA +>36100 +TGATATGGGACTTGTAGCTTTTTTAAA +>36121 +TGTCGTGGGCTGTGCGTTTGAGAA +>36235 +TGCTACCTTTAGCTGCAAGATTAACT +>36238 +CAGATATTAGTGTTTTTCAAGCAGCGG +>36523 +TGCGAGTCCGAGCTTTGAACGTGGG +>36607 +TCTCAATGTAATGTCTTCTTTTTGGA +>36624 +CGTTCTTTAAAACCACCAATGGGA +>36776 +TACTCACGACCATTGTCATTTCTCAAG +>36825 +CGTTCGAGGTCCACTTTCTTAGCGGA +>36850 +TCTGATGTCGGCGAGAAAGGAGTCTCA +>36872 +TCAAGGCCAGCACACAGTAACATGGT +>36968 +ACATTGGAAATACCGCGGGACCGC +>36994 +TATACGTTTTTTGACCTCTTCTCTTTGA +>37110 +TGATAATGTAAACAAAGATAAAGGG +>37210 +TCAGATGAAAGACAAATTAGAATTAAA +>37224 +GACGATAATAGTGAATTTTGGACA +>37226 +TCTGAAGTTGCCGCACTAGAGATGG +>37339 +TATACAGTCCACTATATCGTTGTTTAA +>37395 +TACAATTTACGCTGAATTTAAATGAA +>37460 +CACAAAGTAACGTGCACCACCATTT +>37766 +TCTGTGCAACTTTGTACGCGAAGTCAGC +>37836 +TCAAGGATTAATGTAGGGGGGGGG +>38096 +TTTGGAGGTAGAAGTCTTAGTGGCCGC +>38100 +GACAAAATATGCCCTTCAATTTAGA +>38113 +TATGGTCCAGAATGTAGCCTCGGC +>38213 +TAACTGTTTTTAGTTCAAAGTCTCGAA +>38224 +TTTCAGTTTTTATTGTTAGTCACAGG +>38263 +TAACTGCGAAGTCGATCAGGTCCGA +>38266 +ATCCGGACGATTGACGAGGAGCCCATT +>38271 +TTATGTCAGTGTCGAAGGCGATCGAA +>38373 +TCTATAGCCTTGGCGTAGGAACTCGCA +>38384 +GCTACAACTGCGACAGAAAATTCGGA +>38468 +TATTATCGTCCGCTGCTAAACTGC +>38491 +TGCATACTTCCGTTCTCTTTTCGGGA +>38673 +TTGTGTATTGTGATTCTGATTCGTG +>38745 +TCCCTGGCCCGCTAGACAGCAGGA +>38784 +TGTGCTAATAATTAAAGTCGACTGA +>38873 +TGTCATACTTTCGTCAAAA +>39154 +TAATAGTCAGGGCGCGAATTTTTAAAA +>39325 +TTAGGACTTATTGAACTTTACGGTA +>39359 +TATGCAAATCAAGTGTGACCGTAGCT +>39450 +TTAAACAACGATATAGTGGACAGTA +>39453 +TCTGCCGGATTTTGATCCAATCAAGG +>39819 +GTGGAATTGAAAAAGAACCAGACACA +>40040 +TTTTTGGAACTACCTGAGTCGGTT +>40183 +TAACACAAAGCAGTATGATTTAATAAT +>40189 +CAGCAAGCTGAGATGTACATTAGTATA +>40374 +TACGTTTTCTTGCAGATCAAAAA +>40445 +CGTTCTTTAAAACCACCAATGGGA +>40467 +TACGCAGATTCCTGGGAGTTACAGGA +>40488 +TGATTTGGGCTTGCATACTTGTACT +>40797 +TATATTCGTGTTCATGTGTGAACAGC +>40831 +TCTAAGAACTTCTGAGGTGAAGGGC +>40951 +TTCTTCGTAAGTCAAAATAGTGTCGCC +>40988 +TATGATTGATTGCTTGAGAGT +>40998 +TGATAGAGCTGCATTTGAATTAACGG +>41061 +CTCTTTCCGCTCACTCCCGCTGAGA +>41076 +TGCATTAAGAAGATTTAGGATCC +>41140 +TCAAGGATTAATGTAGGGGGGG +>41143 +CAATAGCGTCGCTGAGTAACAGTG +>41162 +TACGGATTGCAGCGGCTAG +>41183 +TTTTTTGGCACACGATTTTTTGGACGT +>41227 +TAATATATGTATACTCTGCTTGGTG +>41265 +TTTATGATTTTTGGTAATA +>41365 +TAGGAGGGTTCCACAACTATTTCGGGG +>41837 +TAGTTTGACACTGTTTGGAGACGTGG +>42053 +TCTGTGGTCGAATCGAAGGAGTGC +>42113 +TAATAGATCGCTCACCTGTTCCTGG +>42392 +CGGCATCGGAAAACTCCCAGCGGGGC +>42504 +TAACGTTATTATTATTTGAAAATAGAA +>42566 +TTTGTTGGGTAGGAACTTTACTGC +>42668 +TGCTGCAGAGCTTCTTCCAACGTTGG +>42751 +CCAAAGTCTGGTTGTCAGAAAATGTGC +>42777 +AAGGAAGGAACCAAAGAAGCACAAACG +>42804 +TAATTCTAATTTGTCTCTCATCTGA +>42830 +TAGTATACTTATTAAGTCATTTGA +>42857 +AGAGTATTCATCTTGAGGCGTGTG +>42886 +TGAACAACGATTTATGTATATAAGAA +>42899 +TACGATAATAGTGAATTTTGGACA +>42936 +TTTGACTAAAAGTCGCTTGTTTTGGA +>43050 +AATACATAACTCTGGACACAGGAGA +>43219 +TACTTTCGTCAAAATGTTCAGGAGCT +>43242 +TGACATGTCTATTTCCATGGGTTCGGA +>43268 +GTGTATAAAAAAATTTATTGTTGAGCA +>43299 +TCTGGATGATGGCTGATGCTCGTTG +>43491 +TCTGATGACAATGAATTTTTTAGACA +>43520 +TATTGTTGTAATTGCTGCCTCGGTTG +>43654 +ATATGAACAAAGCAAAGACACTAGAA +>43677 +CACGATTCATAACCCTCAGCTGAAG +>43776 +GCTCCTGGCAACTCTGTGATGGAC +>43903 +TATTTTGATGTTTAATGAA +>43990 +TTTCTACTTCGTATTATTTTTATGA +>44062 +AAACATAATAATTGATGGCGGAAGA +>44071 +CACAGACGCAGTGGAAACCGCAGA +>44078 +CGTGCGTCCGAGCAAAAGGTGGT +>44187 +TTATGTAAGAATATTTGTCATTAGA +>44239 +TATCAATGTTGACCGTAATACTCAA +>44253 +TGGATATTGAATGTTTTTGATTTGC +>44279 +TGTGATTTTCCCAATTTATATTAATACA +>44374 +TAGTCGGAGTTGATGAGCTGCC +>44412 +TCCGAATTTTCTGTCGCAGTTGTAGCT +>44423 +TCGGCTTGGGTTTAGAAATTAAAAT +>44558 +ACAAATCATAAATTTGGTGGGA +>44637 +TTTTTTATCAATTGGCACCATGCGAA +>44642 +TACTGTGTGCTGGCCTTGATGAAAGT +>44688 +TAGATGTCTGCACGAATAACGGA +>44945 +TGCTTCCGAGCAATCTACGTTGGTAAAA +>45054 +TCTCATCTGACAATTTTTTAAAAGCGA +>45088 +TCTGAAGCAGCGCTCACGGCAGAATGC +>45203 +TGCCGGATTTTGATCCAATCAAGGGA +>45263 +TATTGATTTTCCTATTTAGTTGAACA +>45274 +AAAAGTCTGGATATTGTAGGATAGGA +>45358 +CAACGCTGGACCTTGGACTCGAGGGC +>45396 +CATCTTGTTATTCTAGTGTCTTTGGTA +>45641 +GAACTAAGATCAGTCGACTGTAATTAT +>45774 +CGACGAACTAGCAGCTCTGGTGT +>45911 +CAGATATTAGTGTTTTTCAAGCAGC +>45997 +TTAACCAGTCGGCGTTGTTTAAGTAGC +>46031 +AAAGCGTCTACTTGAACAATGAGA +>46054 +TTAGATCGTATTACTTGGGTGCTGG +>46199 +TAAGCGTTAGGTCGTATTACTTGGGC +>46255 +TAAGATTGAAAATTACTGTGGAGTAAT +>46366 +TAAGGAATTGTCGGCCATTTAATGTGA +>46387 +CAGAGCTTCTTCCAACGTTGGCAAG +>46701 +TTGTATCTTTTTGCTTTTTATATT +>46705 +TAACTGTTTTTAGTTCAAAGTCTCGGA +>46783 +TCTCATCTGACAATTTTTTAAAAGCGA +>46836 +TCATAAGGACAGACGGACAT +>46892 +AAACATAATAATTGATGGCGGAAGA +>47244 +TCTCTGTCCGCTCGCTTACGATGAGA +>47290 +TTTCAGGAATGGGGTCGTCCCACTA +>47348 +TGCAAAAGTAATATCAAAGACACTA +>47359 +TTTAAAATTATAATAGTCAGGG +>47427 +CATCTTGTTATTCTAGTGTCTTTGGT +>47654 +TTAAACACTGAATTCGGTTTCGAAA +>47656 +TTTCTGAAGAATCCTGTAACTCCC +>47785 +TGTAGATGAGCGGCAAATGTGG +>47827 +TTGATCAAGGTAGAGTTGTCGCGC +>48184 +TTCAAGGATTAATGTAGGGGGGG +>48576 +TTAACCCGGAGACTTGGGTGTGGGT +>48764 +TGCAAAAGTAATATCAAAGACAATAGA +>48782 +TGACAATGTAGTGAACGCCAGTGT +>48893 +TATAAATGCCGTCTGATATTATTAAA +>49017 +ACCGGATGTCATATCCAGCGTCGTGAA +>49381 +TACAATGTAAATTCGTTTCTTCGATCA +>49456 +TTTTGGTTTGATCGTCAGGTGGACGC +>49484 +TCTGCTATCATTGACTCGATCATTGA +>49569 +TCGATTGTATGATCAGTGGAAGTGGC +>49602 +GAAATTGGCCAACATTAATTCGGAA +>49628 +TACTTTCGTCAAAATGTTCAGGAGC +>49715 +TTTGTCCGGGTGCTTCGAAAGAACTCT +>49778 +CAATAGCGTCGCTGAGTAACAGTG