Repository 'sr_bowtie_dataset_annotation'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/sr_bowtie_dataset_annotation

Changeset 4:e11f91575af6 (2019-03-20)
Previous changeset 3:008de522b3ea (2019-02-10) Next changeset 5:279fdd92a615 (2019-03-24)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit 618a7892f6af26278364a75ab23b3c6d8cdc73db
modified:
barplot.r
sr_bowtie_dataset_annotation.xml
test-data/sample1_output.pdf
test-data/sample1_output.tab
test-data/sample_output.pdf
test-data/sample_output.tab
added:
test-data/multisample5_output.pdf
test-data/multisample5_output.tab
test-data/sample2.fa
test-data/sample3.fa
test-data/sample4.fa
test-data/sample5.fa
b
diff -r 008de522b3ea -r e11f91575af6 barplot.r
--- a/barplot.r Sun Feb 10 18:31:51 2019 -0500
+++ b/barplot.r Wed Mar 20 07:12:53 2019 -0400
[
@@ -11,8 +11,7 @@
 warnings()
 library(optparse)
 library(ggplot2)
-library(scales)
-
+library(ggrepel)
 
 
 #Arguments
@@ -37,12 +36,20 @@
 
 ## 
 annotations = read.delim(opt$input, header=F)
-colnames(annotations) = c("class", "counts")
-annotations = cbind(annotations, fraction=annotations$counts/annotations$counts[1])
-annotations = annotations[-1,]
+colnames(annotations) = c("sample", "class", "percent_of_reads", "total")
+annotations$percent=round(annotations$percent_of_reads/annotations$total*100, digits=2)
 # ggplot2 plotting
-ggplot(annotations, aes(x="classes", y=fraction, fill=class)) +
-geom_bar(width = .7, position=position_stack(), stat = "identity") +
-geom_text(aes(label = percent(fraction)), position = position_stack(vjust = 0.5),size = 4)
 ggtitle('Class proportions') 
+ggplot(annotations, aes(x=total/2, y = percent_of_reads, fill = class, width = total)) +
+       geom_bar(position="fill", stat="identity") + 
+       facet_wrap(~sample, ncol=3 ) + geom_label_repel(aes(label = percent), position = position_fill(vjust = 0.5), size=2,show.legend = F) +
+       coord_polar(theta="y") +
+       labs(x = "Class fractions (%)") +
+       theme(axis.text = element_blank(),
+             axis.ticks = element_blank(),
+             panel.grid  = element_blank(),
+             axis.title.y = element_blank(),
+             legend.position="bottom") +
+       geom_text(aes(x = total/2, y= .5, label = paste(round(total/1000000, digits=3), "M"), vjust = 4, hjust=-1), size=2)
 ggsave(file=opt$barplot, device="pdf")
+
b
diff -r 008de522b3ea -r e11f91575af6 sr_bowtie_dataset_annotation.xml
--- a/sr_bowtie_dataset_annotation.xml Sun Feb 10 18:31:51 2019 -0500
+++ b/sr_bowtie_dataset_annotation.xml Wed Mar 20 07:12:53 2019 -0400
[
b'@@ -1,73 +1,66 @@\n-<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.1.0">\n+<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.2.0">\n   <description>by iterative alignments with sRbowtie</description>\n   <requirements>\n         <requirement type="package" version="1.1.2">bowtie</requirement>\n-        <requirement type="package" version="1.3.2">r-optparse</requirement>\n-        <requirement type="package" version="2.2.1">r-ggplot2</requirement>\n-        <requirement type="package" version="0.4.1">r-scales</requirement>\n+        <requirement type="package" version="1.6.0">r-optparse</requirement>\n+        <requirement type="package" version="3.1.0">r-ggplot2</requirement>\n+        <requirement type="package" version="0.8.0">r-ggrepel</requirement>\n   </requirements>\n   <command  detect_errors="exit_code"><![CDATA[\n         #if $refGenomeSource1.genomeSource == "history":\n             bowtie-build -f $refGenomeSource1.ownFile genome  1>/dev/null &&\n-            ln -s -f \'$refGenomeSource1.ownFile\' genome.fa &&\n             #set index_path = \'genome\'\n         #else:\n             #set index_path = $refGenomeSource1.index.fields.path\n         #end if\n-        #if $input.is_of_type(\'fasta\'):\n+        #set method_prefix = "-v %s -k 1 --best" % str($mismatches)\n+        #if $input[0].is_of_type(\'fasta\'):\n             #set format = "-f"\n-        #elif $input.is_of_type(\'fastq\'):\n+        #elif $input[0].is_of_type(\'fastq\'):\n             #set format = "-q"\n         #end if\n-        #if $format == \'-f\':\n-            input_nbr_read=\\$(( \\$(wc -l < $input)/2)) &&\n-        #elif $format == \'-q\':\n-            input_nbr_read=\\$(( \\$(wc -l < $input)/4)) &&\n-        #end if\n-        #set method_prefix = "-v %s -k 1 --best" % str($mismatches)\n-        bowtie -p \\${GALAXY_SLOTS:-4}\n-               $method_prefix\n-               --al matched.fa\n-               --un unmatched.fa\n-               --suppress 6,7,8\n-               $index_path $format \'$input\' > tabular_bowtie_output.tab &&\n-        genome_aligned=\\$(wc -l < matched.fa) &&\n-        genome_aligned=\\$(( \\$genome_aligned/2)) &&\n-        #if $refGenomeSource1.genomeSource == "history":\n-            echo -e "$refGenomeSource1.ownFile.name\\t\\${genome_aligned}\\n" > $output &&\n-        #else:\n-            echo -e "$refGenomeSource1.index.fields.dbkey\\t\\${genome_aligned}\\n" > $output &&\n-        #end if            \n-        #set counter = 0\n-        #for $i in $AdditionalQueries:\n-            rm -f genome.fa &&\n-            #set $counter += 1\n-            #if $counter != 1:\n-                #set input = "class_unmatched.fa"\n-            #else:\n-                #set input = "matched.fa"\n-            #end if\n-            touch temp_class_matched.fa temp_class_unmatched.fa &&\n-            bowtie-build -f $i.ownFile genome  1>/dev/null &&\n-            ln -s -f \'$i.ownFile\' genome.fa &&\n-            #set index_path = \'genome\'\n+\n+        #for $file in $input:\n+            #set sample=$file.element_identifier\n             bowtie -p \\${GALAXY_SLOTS:-4}\n-                $method_prefix\n-                --al temp_class_matched.fa\n-                --un temp_class_unmatched.fa\n-                --suppress 6,7,8\n-                $index_path $format \'$input\' > tabular_bowtie_output.tab &&\n-            class_aligned=\\$(( \\$(wc -l < temp_class_matched.fa)/2)) &&\n-            class_unaligned=\\$(( \\$(wc -l < temp_class_unmatched.fa)/2)) &&\n-            mv temp_class_unmatched.fa class_unmatched.fa &&\n-            echo -e "$i.ownFile.name\\t\\${class_aligned}\\n" >> $output &&\n+                   $method_prefix\n+                   --al matched.fa\n+                   --un unmatched.fa\n+                   --suppress 6,7,8\n+                   $index_path $format $file > tabular_bowtie_output.tab &&\n+            genome_aligned=\\$(wc -l < matched.fa) &&\n+            genome_aligned=\\$(( \\$genome_aligned/2)) &&\n+            #set counter = 0\n+            #for $i in $'..b'  bowtie -p \\${GALAXY_SLOTS:-4}\n+                    $method_prefix\n+                    --al tmp_class_matched.fa\n+                    --un tmp_class_unmatched.fa\n+                    --suppress 6,7,8\n+                    subgenome $format \'$to_align\' > tabular_bowtie_output.tab &&\n+                class_aligned=\\$(( \\$(wc -l < tmp_class_matched.fa)/2)) &&\n+                class_unaligned=\\$(( \\$(wc -l < tmp_class_unmatched.fa)/2)) &&\n+                echo -e "$sample\\t$i.ownFile.name\\t\\$class_aligned\\t\\${genome_aligned}" >> $output &&\n+                mv tmp_class_unmatched.fa class_unmatched.fa &&\n+                rm tmp_class_matched.fa &&\n+            #end for\n+            remaining=\\$(( \\$(wc -l < class_unmatched.fa)/2)) &&\n+            echo -e "$sample\\tNot classified\\t\\${remaining}\\t\\${genome_aligned}" >> $output &&\n         #end for\n-        remaining=\\$(( \\$(wc -l < class_unmatched.fa)/2)) &&\n-        echo -e "Not classified\\t\\${remaining}\\n" >> $output &&\n+        \n+        \n         Rscript $__tool_directory__/barplot.r --input $output --barplot $barplot\n         ]]></command>\n   <inputs>\n-      <param name="input" type="data" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/>\n+    <param name="input" type="data" multiple="True" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/>\n     <param name="mismatches" type="select" label="Number of mismatches allowed" help="specify the number of mismatches allowed during alignments">\n         <option value="0">0</option>\n         <option value="1" selected="true">1</option>\n@@ -99,7 +92,7 @@\n    <outputs>\n        <data format="tabular" name="output" label="Cascade Annotation Analysis">\n            <actions>\n-               <action name="column_names" type="metadata" default="Reference Index,Number of reads" />\n+               <action name="column_names" type="metadata" default="Sample,Reference Index,Number of reads, Total reads" />\n            </actions>\n         </data>\n         <data name="barplot" format="pdf" label="barplot from ${on_string}" />\n@@ -112,7 +105,7 @@\n             <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />\n             <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />\n             <output name="output" ftype="tabular" file="sample1_output.tab" />\n-            <output name="barplot" ftype="pdf" file="sample1_output.pdf" />\n+            <output name="barplot" ftype="pdf" file="sample1_output.pdf" compare="sim_size" delta="500"/>\n         </test>\n         <test>\n             <param name="input" value ="sample.fastq" ftype="fastq" />\n@@ -121,7 +114,16 @@\n             <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />\n             <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />\n             <output name="output" ftype="tabular" file="sample_output.tab" />\n-            <output name="barplot" ftype="pdf" file="sample_output.pdf" />\n+            <output name="barplot" ftype="pdf" file="sample_output.pdf" compare="sim_size" delta="500"/>\n+        </test>\n+        <test>\n+            <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" />\n+            <param name="genomeSource" value="history" />\n+            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />\n+            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />\n+            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />\n+            <output name="output" ftype="tabular" file="multisample5_output.tab" />\n+            <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" />\n         </test>\n     </tests>\n   <help>\n'
b
diff -r 008de522b3ea -r e11f91575af6 test-data/multisample5_output.pdf
b
Binary file test-data/multisample5_output.pdf has changed
b
diff -r 008de522b3ea -r e11f91575af6 test-data/multisample5_output.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/multisample5_output.tab Wed Mar 20 07:12:53 2019 -0400
b
@@ -0,0 +1,15 @@
+sample5.fa dme_miR21_hairpin.fa 0 2126
+sample5.fa Ensembl_transposon_set.fa 1920 2126
+sample5.fa Not classified 206 2126
+sample4.fa dme_miR21_hairpin.fa 0 1676
+sample4.fa Ensembl_transposon_set.fa 1510 1676
+sample4.fa Not classified 166 1676
+sample3.fa dme_miR21_hairpin.fa 0 1900
+sample3.fa Ensembl_transposon_set.fa 1722 1900
+sample3.fa Not classified 178 1900
+sample2.fa dme_miR21_hairpin.fa 0 2317
+sample2.fa Ensembl_transposon_set.fa 2095 2317
+sample2.fa Not classified 222 2317
+sample1.fa dme_miR21_hairpin.fa 0 2954
+sample1.fa Ensembl_transposon_set.fa 2689 2954
+sample1.fa Not classified 265 2954
b
diff -r 008de522b3ea -r e11f91575af6 test-data/sample1_output.pdf
b
Binary file test-data/sample1_output.pdf has changed
b
diff -r 008de522b3ea -r e11f91575af6 test-data/sample1_output.tab
--- a/test-data/sample1_output.tab Sun Feb 10 18:31:51 2019 -0500
+++ b/test-data/sample1_output.tab Wed Mar 20 07:12:53 2019 -0400
b
@@ -1,8 +1,3 @@
-2L-tail.fa 2954
-
-dme_miR21_hairpin.fa 0
-
-Ensembl_transposon_set.fa 2689
-
-Not classified 265
-
+sample1.fa dme_miR21_hairpin.fa 0 2954
+sample1.fa Ensembl_transposon_set.fa 2689 2954
+sample1.fa Not classified 265 2954
b
diff -r 008de522b3ea -r e11f91575af6 test-data/sample2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample2.fa Wed Mar 20 07:12:53 2019 -0400
b
b'@@ -0,0 +1,38818 @@\n+>30592\n+AAAACGGTTTGTGTCTCTGCTGAGCTT\n+>30593\n+GGTGAATTTCCGATTTCAAGT\n+>30594\n+TGGACGGAGAACTGATAAGGGC\n+>30595\n+TGGCAGTGTGGTTAGCTGGTTG\n+>30596\n+TGGACTTCACATTTCTTAGGTGTG\n+>30597\n+TTAGATTAAGATTGATGGTATGAACC\n+>30598\n+TAAAATTGTTCCAATGTAAATTCGAA\n+>30599\n+TGTGATGTGACGTAGTGGAAC\n+>30600\n+GGGGATGTAGCTCAGATGGTAGAGCCT\n+>30601\n+TTTTTTTGCCGCCAGCATATCGCTTA\n+>30602\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30603\n+TTAGATCAGATTTGTGGAAAATT\n+>30604\n+TGTACGTTGGTCTTCTTTGAA\n+>30605\n+TATTGTACTGTGAAACTGATGGTT\n+>30606\n+GACTCTTGCCTCCAGCATCCACTCAGA\n+>30607\n+TCAGAATTTGAATGGTCAAGTCGGACT\n+>30608\n+TATTGACTGGCGACGCTTTTGAGG\n+>30609\n+TTCTGATGTCGGTCACATGCTTGGA\n+>30610\n+TTAATTAATAGTATAGATACCA\n+>30611\n+TCTTTGGTATTCTAGCTGTAGA\n+>30612\n+TGATTATTTTAGTTATTTCTTTAGGA\n+>30613\n+AATGGCACTGGAAGAATTCACGG\n+>30614\n+TGCTTGGACTACATATGGTTGAGGGT\n+>30615\n+TATTAGTCAGATACGCCAACAACATGA\n+>30616\n+TTATATGGTTTGTTTTGCGTTGTGGAA\n+>30617\n+TATTTGCACCTCTTCGATAAAACAGTA\n+>30618\n+TACATTGAACTGGATTGATACGTT\n+>30619\n+TTGTTATGTACTTTACTATTTTGGGA\n+>30620\n+TTAGACATGAATGCTATCGGACGTT\n+>30621\n+TGTGTAATTTAGTTTTTCCAGGAGCGG\n+>30622\n+TCTTTGGTATTCTAGCTGTAG\n+>30623\n+CAGATCAACAAAGTACGATGGGGA\n+>30624\n+TTATGGACAATGTTTTGGGTGACCTAG\n+>30625\n+TGTGATGTGACGTAGTGGAACA\n+>30626\n+TCTCGGTAGTGCCTTTAGGGTGGA\n+>30627\n+AAAGGTTGAGCCTATGTAGAACA\n+>30628\n+TGTGATGTGACGTAGTGGAAC\n+>30629\n+TTGAGCTGTACTACGCTAATATGGGC\n+>30630\n+TTTTGTGTTGTTTACTGTTGTTCGAG\n+>30631\n+TCGCGAATACTGTACAGCAGT\n+>30632\n+TAGTGCTTTCTATGTCGGAAATGGATT\n+>30633\n+TTATTGTGTTTGAATGTGTTTATGT\n+>30634\n+GGGCACGTACGGATACCTTGTCGGA\n+>30635\n+CCTCCTCGAGCTCTACCTGATGG\n+>30636\n+TCACTGGGCTTTGTTTATCTC\n+>30637\n+CATGTTACTGTCGAAAACCC\n+>30638\n+TTTCGGACTGCTTCTTAAGGAATTGAA\n+>30639\n+TGGACGGAGAACTGATAAGGGC\n+>30640\n+TTATACCTCATAGACTGGTCGCAAA\n+>30641\n+ATTAAGATATAATTTTGGGT\n+>30642\n+TTTTAAAAATATCGAATTGAAATATT\n+>30643\n+TACATGCAAGCGTCGACCATAATG\n+>30644\n+TTTCTTGGAATTCAGGTGGGA\n+>30645\n+TGTGATGTGACGTAGTGGAAC\n+>30646\n+TCGACGGACATTCCAGATAAGGGGGGC\n+>30647\n+TGCTTGGACTACATATGGTTGAGGGTTGT\n+>30648\n+TAGTGGTTTATCGGCAGTACGCCGGA\n+>30649\n+CTCAATGGTCTAGGGGTATGATTCT\n+>30650\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30651\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30652\n+TCTGCATCATCAGGTATACGTCGTC\n+>30653\n+TGAAACGATGTGGCCAAGGAATTCTG\n+>30654\n+TGCTTGGACTACATATGGTTGAGGGTTG\n+>30655\n+AAGGATTGGCTCTGAAGATTGAGAT\n+>30656\n+TTTTAAGTGATACAGAACTCGATGAAG\n+>30657\n+TGGACGGAGAACTGATAAGGGC\n+>30658\n+TGACTAGATTTCATGCTCGTCTA\n+>30659\n+TGTGATGTGACGTAGTGGAAC\n+>30660\n+TCTGGGTGTTCTTTGAGATTTGGAA\n+>30661\n+TCTTTGGTATTCTAGCTGTAG\n+>30662\n+TATTGCACTTGAGACGGCCTGAAAA\n+>30663\n+GGGGACGTAGCTCAGTGGTAGAGC\n+>30664\n+TAAGACTTTAGAAGTTTGTGTGTG\n+>30665\n+TGTGATGTGACGTAGTGGAAC\n+>30666\n+GGTTCTATGGTGTAATGG\n+>30667\n+TTATTGAGGCGACCAGACTGAGTCCT\n+>30668\n+TGTAAGATCTAATTTAGGGGA\n+>30669\n+TGCTGTGCATCCTGCAAGTAGTTGTA\n+>30670\n+TACATCTCTACACCCCCTCTCCA\n+>30671\n+TCTGAAGGTGTAGTATTTGGTCGA\n+>30672\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30673\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30674\n+TTCGGACTGATTGTTATGGTGGGCA\n+>30675\n+TGTGATGTGACGTAGTGGAAC\n+>30676\n+TGCGATCCGAGTTCAGATCT\n+>30677\n+TGTGCGTAGTCTGTGGAGATCTCC\n+>30678\n+TTAATTGTATTATTTGTTATTTTGGGT\n+>30679\n+TCCCATATTGTCTAGTGGTTAGGATA\n+>30680\n+TGTTGTCATTATCTCCTATTTCTGA\n+>30681\n+TATTGGGCCATCTCGCTGTATTGAA\n+>30682\n+TTCCTACGAATCGCTGTATGAACAGT\n+>30683\n+GCTAACTTTTTTCTTTTAATGG\n+>30684\n+AGCAAGTACTGGTCTCTTAAA\n+>30685\n+TGAGACTGAAATTTATATAGATGT\n+>30686\n+TGAACACAGCTGGTGGTATCC\n+>30687\n+TCTTTGGTATTCTAGCTGTAG\n+>30688\n+TCAGGTACTTAGTGACTCTCAC\n+>30689\n+TTCTTCGCAGAATGCTCTTAGGCT\n+>30690\n+TCAATCCAAGGTATTTCGGG\n+>30691\n+TGTTATGGTTGCGGAACTCCGGAGGTG\n+>30692\n+TGTGATGTGACGTAGTGGAAC\n+>30693\n+GCTGATGTCGTTGTTGTTGCTGCT\n+>30694\n+CTGCTTGGACTACATATGGTTGAGGG\n+>30695\n+AACGAACGAGACTCAAATATATTAAATA\n+>30696\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30697\n+GCTTCAGGAACCAGATCATTGTGGA\n+>30698\n+TTCGTTTTGTCGGTGTTTGGCTTAAGG\n+>30699\n+TGTCACGGTCGCCATGTAGTTAATA\n+>30700\n+ACAACAATTAATAAGATCAAGAAAT\n+>30701\n+GAGGAAAGTAGCAGGTGTTGAG\n+>30702\n+TGTGATGTGACGTAGTGGAAC\n+>30703\n+TTTAGAAAATTTTAAAGGTATGTGA\n+>30704\n+GAACGGCCTCTAGTGCAGATCTTGGT\n+>30705\n+TCAGGTACTTAGTGACTCTCAAA\n+>30706\n+GCTATGCTCTCAACTGCGCGCGCGGT\n+>30707\n+AGTTGATTGAAGATCT'..b'CCACATGATTCGGCTT\n+>49886\n+ATACATTGTAGACGGTCTTACGGGA\n+>49887\n+TAAACTAATAGAGACAGGTAGAATC\n+>49888\n+TTGTTGCAATGTCTGACTGGGGTTCGT\n+>49889\n+TGCTTGGACTACACATGGTTGAGGGTTGTA\n+>49890\n+GGGTCAGGCGATGATGAATT\n+>49891\n+TGTGATGTGACGTAGTGGAACA\n+>49892\n+TTGGGATATTGTTGGAAATGATTTTT\n+>49893\n+TATTCGAGAATTTTGTGATTAGTGA\n+>49894\n+TTGTGGAATGTTCGTGTCGAA\n+>49895\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49896\n+TGGAAGACTAGTGATTTTGTTG\n+>49897\n+TGCGACGGCGGCGAACGTAGTA\n+>49898\n+TGTGATGTGACGTAGTGGAA\n+>49899\n+TGATGACGAGCGTTCTTTTAGA\n+>49900\n+TCAGGGTGACCACACATTTCAAGGA\n+>49901\n+TGGAATTTCGTTGTGTCGTCAGTGTGA\n+>49902\n+AAGGACCCGAGGGCTGCAACCTTTTC\n+>49903\n+TTAGATAACTGAAAGCAAGTACTGG\n+>49904\n+AAGAAGCCGTCGAGAGATATCGGA\n+>49905\n+TGTTATCGATCATTTTAGTTCGCTGA\n+>49906\n+TCCCATATTGTCTAGTGG\n+>49907\n+TACATGGTTGTCTTGTAGAGTTGACGC\n+>49908\n+TGAGCGGAGAACCAGAGTTGATGTG\n+>49909\n+TATAGGTCTGATTCTAAAATGGGTGA\n+>49910\n+GGACTGACTCGTGTAGTGTGCACT\n+>49911\n+TGTTTTTCGGCCTTATAAACGGGG\n+>49912\n+CCTAATAAGAATTGAGGGATCAGGA\n+>49913\n+TATTTGTGCTGCCTCCTCTGAAATCA\n+>49914\n+TGTTATGTTGCCAGTCTGAGTCGTCAG\n+>49915\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49916\n+CAATATCGTCAACATCCTCGAACG\n+>49917\n+TGTGATGTGACGTAGTGGAAC\n+>49918\n+TTTAAGCTGGTAGGTGTAGAAGCCT\n+>49919\n+TTCTTTCGGATTCTGAAGTAATTAAT\n+>49920\n+GAAGATGAAACTGTTCTGGACGGA\n+>49921\n+TCACTGGGCTTTGTTTATCTCA\n+>49922\n+TCAGGTACTTAGTGACTCTCAA\n+>49923\n+ATCGAAAAGATTCGCTGAAGTTGGGC\n+>49924\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49925\n+TAGATGTAAGAATAACTGTTTTGA\n+>49926\n+TAGCAACCAGGTCATCTTCAAACT\n+>49927\n+TTTATTGGAGAGGTTGATCCT\n+>49928\n+GAAGGGTTCGGGCTCAATTAGAGGGT\n+>49929\n+CATGGGTTCTGATGTGTTTTCACGA\n+>49930\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49931\n+TCCATTTTTGAACACAGTTTGATGGGA\n+>49932\n+TAACTGAAGTATCTGAGGATTGGATT\n+>49933\n+TTTGAACTGGTCTGTGTTGAATTCAACC\n+>49934\n+GACGTTAGAAATCCGTTGGTGGA\n+>49935\n+TGCTTGGACTACATATGGTTGAGGG\n+>49936\n+TGGACTCGTTAGGTATGGATGTTGC\n+>49937\n+TGAGTCCCACAATACTGTATATA\n+>49938\n+TAAGACTATAATTGATGGAATGAACT\n+>49939\n+TGTTATACTCAGATACAGACGGTTCGA\n+>49940\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49941\n+TCTGAGTTTATTTTTAAAAGGTGTT\n+>49942\n+TCAGGTACTTAGTGACTCTCA\n+>49943\n+TCAGGATTGCTGAGCTGTGCGGTA\n+>49944\n+GGGGATGTAGCTCAGTGGA\n+>49945\n+AATATCATCTATTCTCGGTAGTGGA\n+>49946\n+GGTTCCATGGTGTAATGG\n+>49947\n+TGCGAGGTCAATGGTTGTAAAGTA\n+>49948\n+TCTTTGTAGTCGGTTGGCTGTACAGGT\n+>49949\n+GGCACTTGAACTTGGCACTGGACGC\n+>49950\n+GGGGACGTAGCTCAGTGGGG\n+>49951\n+TGCATAAGATATTCCGCCTCGAGAAGA\n+>49952\n+TGGACGGAGAACTGATAAGGG\n+>49953\n+TATTAACCAGACTGCAAATATACT\n+>49954\n+AACAATTAGGACCACGGCGATGGT\n+>49955\n+TTGGTGTAATCTTGGATCGGAGAC\n+>49956\n+CTAACTGTGAAAGGGGAATTGACCGGC\n+>49957\n+TGTAAAAGGGTGTCTCACTGCGGC\n+>49958\n+TTCTTATGGATTCTGAGTGATGTG\n+>49959\n+TTACAAAGTTGTAGATTGGTCGGGG\n+>49960\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49961\n+TAAATTGATTTAGTTTGAATTTAGA\n+>49962\n+TCTTTGGTATTCTAGCTGTAG\n+>49963\n+TAATACTGTCAGGTAAAGATGTCA\n+>49964\n+TATGCCGTAAGCTTGTTGCGCGCGGA\n+>49965\n+ACTTTCGGACTGATTGTTATGGTGG\n+>49966\n+TAATTGAGTACAGTTGGACATAGC\n+>49967\n+GCGGAGGTTGCGGTGCTGTACACTG\n+>49968\n+TTTCAGTTCGGGTAGGGTGGG\n+>49969\n+TGAGTACAAGCCCTTCTGGCGTGAAGA\n+>49970\n+TTTTGGAGCCACTTTCGCCTCGTAGGA\n+>49971\n+TCTATCCAAAGAGCTGATTGTCATACT\n+>49972\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49973\n+TTTGTGCAAATCAATATAAATTGA\n+>49974\n+TCGAATTGCTGAATGCCGAAGTAAAATA\n+>49975\n+AGATATGTTTGATATTCTTGGTTG\n+>49976\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49977\n+TCCCCCCCTAAGATTATGACCGTC\n+>49978\n+TTGCAGACGGACTTTTGAATTCACAGG\n+>49979\n+TGCTTGGACTACATATGGTTGAGGG\n+>49980\n+TGAGGTAGTAGGTTGTATAG\n+>49981\n+TGACGATGCCGCTGTAGAGCTTG\n+>49982\n+TGTTGTGTTGCGACAGTAGAGCGC\n+>49983\n+TACATGGCGACTTTCTTGCAACTGAACT\n+>49984\n+TGCAGGAAGCTTTCAAGGCAACAGA\n+>49985\n+TTAGGACGAGATTCGCTAATGCAAT\n+>49986\n+TAACATATGTGCAAGTTATTGGGA\n+>49987\n+TGTGATGTGACGTAGTGGAA\n+>49988\n+AACTTCTTCTATCTTTGTGCGGGA\n+>49989\n+TATTTGGGTCACCGGGTTAAGTAGCGC\n+>49990\n+ATATTGTCTAGTGGTTAGG\n+>49991\n+TGAGGTAGTAGGTTGTATAGT\n+>49992\n+TCTTGGACTGAGCAGCTACTGTTTG\n+>49993\n+GCTCTCTTGAGTGGATTGCGCATGGA\n+>49994\n+TGATCTGGGGTGCATGGTAATCGG\n+>49995\n+AATGGCACTGGAAGAATTCAC\n+>49996\n+CGGGAAACTATGGATCAAATG\n+>49997\n+ATCTGCCTGAGTCGACTGTTCCGTAA\n+>49998\n+TTTGAGCAGCGAATCTGGAACGGT\n+>49999\n+TCGAAGACTAGACGGATTTTTCCCGGCT\n+>50000\n+TATTTAGAAAAACAGGTGAGTGA\n'
b
diff -r 008de522b3ea -r e11f91575af6 test-data/sample3.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample3.fa Wed Mar 20 07:12:53 2019 -0400
b
b'@@ -0,0 +1,32172 @@\n+>33915\n+TGACTTGCTGCTTCTCGTCGAGGGA\n+>33916\n+TGGACAGAAACCACTCGACGGGTA\n+>33917\n+TGGACGGAGAACTGATAAGGGC\n+>33918\n+GAGGACAGCTCCGATGGCCCAGTTAGA\n+>33919\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>33920\n+TATTTACCCATTGAACAAACCTTA\n+>33921\n+GGGGATGTAGCTCAGTGGGAGAGCCT\n+>33922\n+TACTCTCCAGATCTGCGGGTCCAAG\n+>33923\n+AGGGATAACTGGCTTGTGGCGGC\n+>33924\n+TCGGATTGTAGGACTGGACCAGCATG\n+>33925\n+TCACTGGGCTTTGTTTATCTCA\n+>33926\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>33927\n+TTCCGCCCTAAGGGAGCGAGA\n+>33928\n+TTTGTTAAGCAACGGTCGAAGGATA\n+>33929\n+TGTGATGTGACGTAGTGGAAC\n+>33930\n+TCACTGGGCTTTGTTTATCTCA\n+>33931\n+TCTCGGTCGTTAAGTCAGATGTAGGC\n+>33932\n+TGTGATGTGACGTAGTGGAACA\n+>33933\n+TGAGATCATTTTGAAAGCTGATT\n+>33934\n+TCTTTGGTATTCTAGCTGTAGA\n+>33935\n+AAATATCAGCTGGTAATTCTGGG\n+>33936\n+TGACTAGATCCACACTCATTAAA\n+>33937\n+TGCGGTGTAAAACATATTAATGGAA\n+>33938\n+TGCAGAGATGTATGAACGAATGTAT\n+>33939\n+TGTAGGGCAGGGACTCATTAACATC\n+>33940\n+TGACTAGATTTCATGCTCGTCT\n+>33941\n+TCAAGGTTATGTGGAGCATCTGTAAC\n+>33942\n+TTCGTAATGTCCGTATTTAGTAGAGA\n+>33943\n+TCATAACTGCACGCTGGAACACGGA\n+>33944\n+TCTTGGACTGAGCAGCTACTGTT\n+>33945\n+TGCCTTAATGAATCTTTGTAAATGGA\n+>33946\n+TGTTTCAACTGCGGAAAAGCTGGA\n+>33947\n+GACGAACATTATCTGAACAATAAGTGT\n+>33948\n+CAAAGTTTCGGATTTTATTACTGCGTC\n+>33949\n+TGAACACAGCTGGTGGTATCC\n+>33950\n+AATACCCTGGACATCGAGTGC\n+>33951\n+TGACAGTAGTTGTGAACACAGTGCGT\n+>33952\n+TGTGATGTGACGTAGTGGAACA\n+>33953\n+TGAAGTTGGGCGCCTGTTATAGAGATC\n+>33954\n+TGTTGAAAGCGTTCCTTACGTCTAGA\n+>33955\n+TAAGGCGTAGTTAGAGGATGTTACT\n+>33956\n+GAACAGAATTCTTTGAAACGGG\n+>33957\n+ATTCTTGAACAGTACCTGCCTTAG\n+>33958\n+TTGAAATCTTGCTGACAATTGAATT\n+>33959\n+TCCCATATTGTCTAGTGGGGGA\n+>33960\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>33961\n+TTCAAACTTAGGAGCTAGCATGAAAAC\n+>33962\n+TAAGACTTTAGAAGTTTGTGTGTGCT\n+>33963\n+TGGACGGAGAACTGATAAGGGC\n+>33964\n+GCTTCAACGGAGGAGTCGGTCGTTGTG\n+>33965\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>33966\n+TGTACTCTGACTCCGAGGGAGGGTG\n+>33967\n+TGCTGGTTGTTCTGTATTTTTCGGGC\n+>33968\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>33969\n+TTTGGATCTGCCTGAGTCGACTGG\n+>33970\n+TCGGGAAATGACCAACCTAGGAGTTGT\n+>33971\n+TATGGGTGGTCAGCTTTTCGTTATACT\n+>33972\n+TTGATGACTTGCTGTTTGGCAACCA\n+>33973\n+TGGATGACTGGAAACGCTTCGTGGGA\n+>33974\n+TGTGATGTGACGTAGTGGAA\n+>33975\n+TCTTTGGTATTCTAGCTGTAGA\n+>33976\n+TCTTTGGTATTCTAGCTGTAGA\n+>33977\n+GGGGATGTAGCTCAGATGGTAG\n+>33978\n+TCCTCGATAGTATAGTGGTTAGTATC\n+>33979\n+CTAAGATGTGCATTCTGTTGGA\n+>33980\n+TAAGATAGACCATGCTGTGGAGC\n+>33981\n+TGTGATGTGACGTAGTGGAA\n+>33982\n+AAGATTCTGAGAACCATCACTGGGGC\n+>33983\n+TGTGATGTGACGTAGTGGAACA\n+>33984\n+TTCAACGAAACTTCGAAGTGTAGCGA\n+>33985\n+TGTGATGTGACGTAGTGGAA\n+>33986\n+TCACTGGGCTTTGTTTATCTCA\n+>33987\n+AAAAGATCCCGGAAAGTCGGCCAGCGA\n+>33988\n+TAGTGGACTGTATTTATTTGATATATG\n+>33989\n+ATTGCCACATCATAGGCTTCTTGGA\n+>33990\n+TGGATGACTGGAAACGCTTCGTGG\n+>33991\n+TGCTTGGACTACATATGGTTGAGGGTTGT\n+>33992\n+ATATTTAGAAAAACAGGTGAGTGA\n+>33993\n+TATGATTAGCAATGAAACATTTTTGA\n+>33994\n+AAATATCAGCTGGTAATTCTG\n+>33995\n+TACGTTGGGCTTAGATACATTTTGGA\n+>33996\n+GGGGATGTAGCTCAGATGGTAGAGCCT\n+>33997\n+TATGAGCCATTGCTGGTTGAGAAAGA\n+>33998\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>33999\n+CACAAACATGTTGAACCTGGCGGG\n+>34000\n+TGTTGAGTTGGTCGAGTAGGTTTGA\n+>34001\n+CTAAGGAAATAGTAGCCGTGAT\n+>34002\n+TATATACGTCTGCTCCAATGATTAGA\n+>34003\n+TTCGGGATATAGTGCCTTGTGAATA\n+>34004\n+TGGACCGTACTTTAGTATA\n+>34005\n+TGTTGTTTTATTTATGAGTCGAACT\n+>34006\n+TGTGATGTGACGTAGTGGAAC\n+>34007\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>34008\n+TATTATTAAGTACTCGTACATACGGA\n+>34009\n+GATCTTTAGAAAATCTGAGGCGTCGGG\n+>34010\n+TGGTTCGAAAATCGCGGTGATGAATC\n+>34011\n+TGTGATGTGACGTAGTGGAACA\n+>34012\n+TATAGTCATCATCCTCTGAAATGAAA\n+>34013\n+TTTGGTCCGAGGCAATCAATTTTA\n+>34014\n+TGGAAGACTAGTGATTTTGTTGT\n+>34015\n+TTCGTATCAATTATGTCATGTACAA\n+>34016\n+TGCTTGGACTACATATGGTTGAGGG\n+>34017\n+TGGACGGAGAACTGATAAGG\n+>34018\n+ATCGGTGGTTCAGTGGTAGAATGCT\n+>34019\n+GAACAACATCTGATCAGGCACCGGA\n+>34020\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>34021\n+TATCACAGCCATTTTGACGAGTT\n+>34022\n+AAGAGGCAGTAGTTATTCATTAATC\n+>34023\n+TTATGAATGTCGCTTCTGATCCGGA\n+>34024\n+TTAGATCTTTATTTCTTCGTTGTCGG\n+>34025\n+GGTTCCATGGTGTAATGG\n+>34026\n+TACCAACGTGGTGAGTCTGAAGATAAG\n+>34027\n+TCGATGGTCTGTTGAATCTCAGCGGA\n+>34028\n+TGACGAAAGCAAAGACAATAGAATA\n+>34029\n+ATCGGTGGTTCAGTGGTAGAATG'..b'CCACATGATTCGGCTT\n+>49886\n+ATACATTGTAGACGGTCTTACGGGA\n+>49887\n+TAAACTAATAGAGACAGGTAGAATC\n+>49888\n+TTGTTGCAATGTCTGACTGGGGTTCGT\n+>49889\n+TGCTTGGACTACACATGGTTGAGGGTTGTA\n+>49890\n+GGGTCAGGCGATGATGAATT\n+>49891\n+TGTGATGTGACGTAGTGGAACA\n+>49892\n+TTGGGATATTGTTGGAAATGATTTTT\n+>49893\n+TATTCGAGAATTTTGTGATTAGTGA\n+>49894\n+TTGTGGAATGTTCGTGTCGAA\n+>49895\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49896\n+TGGAAGACTAGTGATTTTGTTG\n+>49897\n+TGCGACGGCGGCGAACGTAGTA\n+>49898\n+TGTGATGTGACGTAGTGGAA\n+>49899\n+TGATGACGAGCGTTCTTTTAGA\n+>49900\n+TCAGGGTGACCACACATTTCAAGGA\n+>49901\n+TGGAATTTCGTTGTGTCGTCAGTGTGA\n+>49902\n+AAGGACCCGAGGGCTGCAACCTTTTC\n+>49903\n+TTAGATAACTGAAAGCAAGTACTGG\n+>49904\n+AAGAAGCCGTCGAGAGATATCGGA\n+>49905\n+TGTTATCGATCATTTTAGTTCGCTGA\n+>49906\n+TCCCATATTGTCTAGTGG\n+>49907\n+TACATGGTTGTCTTGTAGAGTTGACGC\n+>49908\n+TGAGCGGAGAACCAGAGTTGATGTG\n+>49909\n+TATAGGTCTGATTCTAAAATGGGTGA\n+>49910\n+GGACTGACTCGTGTAGTGTGCACT\n+>49911\n+TGTTTTTCGGCCTTATAAACGGGG\n+>49912\n+CCTAATAAGAATTGAGGGATCAGGA\n+>49913\n+TATTTGTGCTGCCTCCTCTGAAATCA\n+>49914\n+TGTTATGTTGCCAGTCTGAGTCGTCAG\n+>49915\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49916\n+CAATATCGTCAACATCCTCGAACG\n+>49917\n+TGTGATGTGACGTAGTGGAAC\n+>49918\n+TTTAAGCTGGTAGGTGTAGAAGCCT\n+>49919\n+TTCTTTCGGATTCTGAAGTAATTAAT\n+>49920\n+GAAGATGAAACTGTTCTGGACGGA\n+>49921\n+TCACTGGGCTTTGTTTATCTCA\n+>49922\n+TCAGGTACTTAGTGACTCTCAA\n+>49923\n+ATCGAAAAGATTCGCTGAAGTTGGGC\n+>49924\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49925\n+TAGATGTAAGAATAACTGTTTTGA\n+>49926\n+TAGCAACCAGGTCATCTTCAAACT\n+>49927\n+TTTATTGGAGAGGTTGATCCT\n+>49928\n+GAAGGGTTCGGGCTCAATTAGAGGGT\n+>49929\n+CATGGGTTCTGATGTGTTTTCACGA\n+>49930\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49931\n+TCCATTTTTGAACACAGTTTGATGGGA\n+>49932\n+TAACTGAAGTATCTGAGGATTGGATT\n+>49933\n+TTTGAACTGGTCTGTGTTGAATTCAACC\n+>49934\n+GACGTTAGAAATCCGTTGGTGGA\n+>49935\n+TGCTTGGACTACATATGGTTGAGGG\n+>49936\n+TGGACTCGTTAGGTATGGATGTTGC\n+>49937\n+TGAGTCCCACAATACTGTATATA\n+>49938\n+TAAGACTATAATTGATGGAATGAACT\n+>49939\n+TGTTATACTCAGATACAGACGGTTCGA\n+>49940\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49941\n+TCTGAGTTTATTTTTAAAAGGTGTT\n+>49942\n+TCAGGTACTTAGTGACTCTCA\n+>49943\n+TCAGGATTGCTGAGCTGTGCGGTA\n+>49944\n+GGGGATGTAGCTCAGTGGA\n+>49945\n+AATATCATCTATTCTCGGTAGTGGA\n+>49946\n+GGTTCCATGGTGTAATGG\n+>49947\n+TGCGAGGTCAATGGTTGTAAAGTA\n+>49948\n+TCTTTGTAGTCGGTTGGCTGTACAGGT\n+>49949\n+GGCACTTGAACTTGGCACTGGACGC\n+>49950\n+GGGGACGTAGCTCAGTGGGG\n+>49951\n+TGCATAAGATATTCCGCCTCGAGAAGA\n+>49952\n+TGGACGGAGAACTGATAAGGG\n+>49953\n+TATTAACCAGACTGCAAATATACT\n+>49954\n+AACAATTAGGACCACGGCGATGGT\n+>49955\n+TTGGTGTAATCTTGGATCGGAGAC\n+>49956\n+CTAACTGTGAAAGGGGAATTGACCGGC\n+>49957\n+TGTAAAAGGGTGTCTCACTGCGGC\n+>49958\n+TTCTTATGGATTCTGAGTGATGTG\n+>49959\n+TTACAAAGTTGTAGATTGGTCGGGG\n+>49960\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49961\n+TAAATTGATTTAGTTTGAATTTAGA\n+>49962\n+TCTTTGGTATTCTAGCTGTAG\n+>49963\n+TAATACTGTCAGGTAAAGATGTCA\n+>49964\n+TATGCCGTAAGCTTGTTGCGCGCGGA\n+>49965\n+ACTTTCGGACTGATTGTTATGGTGG\n+>49966\n+TAATTGAGTACAGTTGGACATAGC\n+>49967\n+GCGGAGGTTGCGGTGCTGTACACTG\n+>49968\n+TTTCAGTTCGGGTAGGGTGGG\n+>49969\n+TGAGTACAAGCCCTTCTGGCGTGAAGA\n+>49970\n+TTTTGGAGCCACTTTCGCCTCGTAGGA\n+>49971\n+TCTATCCAAAGAGCTGATTGTCATACT\n+>49972\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49973\n+TTTGTGCAAATCAATATAAATTGA\n+>49974\n+TCGAATTGCTGAATGCCGAAGTAAAATA\n+>49975\n+AGATATGTTTGATATTCTTGGTTG\n+>49976\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49977\n+TCCCCCCCTAAGATTATGACCGTC\n+>49978\n+TTGCAGACGGACTTTTGAATTCACAGG\n+>49979\n+TGCTTGGACTACATATGGTTGAGGG\n+>49980\n+TGAGGTAGTAGGTTGTATAG\n+>49981\n+TGACGATGCCGCTGTAGAGCTTG\n+>49982\n+TGTTGTGTTGCGACAGTAGAGCGC\n+>49983\n+TACATGGCGACTTTCTTGCAACTGAACT\n+>49984\n+TGCAGGAAGCTTTCAAGGCAACAGA\n+>49985\n+TTAGGACGAGATTCGCTAATGCAAT\n+>49986\n+TAACATATGTGCAAGTTATTGGGA\n+>49987\n+TGTGATGTGACGTAGTGGAA\n+>49988\n+AACTTCTTCTATCTTTGTGCGGGA\n+>49989\n+TATTTGGGTCACCGGGTTAAGTAGCGC\n+>49990\n+ATATTGTCTAGTGGTTAGG\n+>49991\n+TGAGGTAGTAGGTTGTATAGT\n+>49992\n+TCTTGGACTGAGCAGCTACTGTTTG\n+>49993\n+GCTCTCTTGAGTGGATTGCGCATGGA\n+>49994\n+TGATCTGGGGTGCATGGTAATCGG\n+>49995\n+AATGGCACTGGAAGAATTCAC\n+>49996\n+CGGGAAACTATGGATCAAATG\n+>49997\n+ATCTGCCTGAGTCGACTGTTCCGTAA\n+>49998\n+TTTGAGCAGCGAATCTGGAACGGT\n+>49999\n+TCGAAGACTAGACGGATTTTTCCCGGCT\n+>50000\n+TATTTAGAAAAACAGGTGAGTGA\n'
b
diff -r 008de522b3ea -r e11f91575af6 test-data/sample4.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample4.fa Wed Mar 20 07:12:53 2019 -0400
b
b'@@ -0,0 +1,27292 @@\n+>30592\n+AAAACGGTTTGTGTCTCTGCTGAGCTT\n+>30593\n+GGTGAATTTCCGATTTCAAGT\n+>30594\n+TGGACGGAGAACTGATAAGGGC\n+>30595\n+TGGCAGTGTGGTTAGCTGGTTG\n+>30596\n+TGGACTTCACATTTCTTAGGTGTG\n+>30597\n+TTAGATTAAGATTGATGGTATGAACC\n+>30598\n+TAAAATTGTTCCAATGTAAATTCGAA\n+>30599\n+TGTGATGTGACGTAGTGGAAC\n+>30600\n+GGGGATGTAGCTCAGATGGTAGAGCCT\n+>30601\n+TTTTTTTGCCGCCAGCATATCGCTTA\n+>30602\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30603\n+TTAGATCAGATTTGTGGAAAATT\n+>30604\n+TGTACGTTGGTCTTCTTTGAA\n+>30605\n+TATTGTACTGTGAAACTGATGGTT\n+>30606\n+GACTCTTGCCTCCAGCATCCACTCAGA\n+>30607\n+TCAGAATTTGAATGGTCAAGTCGGACT\n+>30608\n+TATTGACTGGCGACGCTTTTGAGG\n+>30609\n+TTCTGATGTCGGTCACATGCTTGGA\n+>30610\n+TTAATTAATAGTATAGATACCA\n+>30611\n+TCTTTGGTATTCTAGCTGTAGA\n+>30612\n+TGATTATTTTAGTTATTTCTTTAGGA\n+>30613\n+AATGGCACTGGAAGAATTCACGG\n+>30614\n+TGCTTGGACTACATATGGTTGAGGGT\n+>30615\n+TATTAGTCAGATACGCCAACAACATGA\n+>30616\n+TTATATGGTTTGTTTTGCGTTGTGGAA\n+>30617\n+TATTTGCACCTCTTCGATAAAACAGTA\n+>30618\n+TACATTGAACTGGATTGATACGTT\n+>30619\n+TTGTTATGTACTTTACTATTTTGGGA\n+>30620\n+TTAGACATGAATGCTATCGGACGTT\n+>30621\n+TGTGTAATTTAGTTTTTCCAGGAGCGG\n+>30622\n+TCTTTGGTATTCTAGCTGTAG\n+>30623\n+CAGATCAACAAAGTACGATGGGGA\n+>30624\n+TTATGGACAATGTTTTGGGTGACCTAG\n+>30625\n+TGTGATGTGACGTAGTGGAACA\n+>30626\n+TCTCGGTAGTGCCTTTAGGGTGGA\n+>30627\n+AAAGGTTGAGCCTATGTAGAACA\n+>30628\n+TGTGATGTGACGTAGTGGAAC\n+>30629\n+TTGAGCTGTACTACGCTAATATGGGC\n+>30630\n+TTTTGTGTTGTTTACTGTTGTTCGAG\n+>30631\n+TCGCGAATACTGTACAGCAGT\n+>30632\n+TAGTGCTTTCTATGTCGGAAATGGATT\n+>30633\n+TTATTGTGTTTGAATGTGTTTATGT\n+>30634\n+GGGCACGTACGGATACCTTGTCGGA\n+>30635\n+CCTCCTCGAGCTCTACCTGATGG\n+>30636\n+TCACTGGGCTTTGTTTATCTC\n+>30637\n+CATGTTACTGTCGAAAACCC\n+>30638\n+TTTCGGACTGCTTCTTAAGGAATTGAA\n+>30639\n+TGGACGGAGAACTGATAAGGGC\n+>30640\n+TTATACCTCATAGACTGGTCGCAAA\n+>30641\n+ATTAAGATATAATTTTGGGT\n+>30642\n+TTTTAAAAATATCGAATTGAAATATT\n+>30643\n+TACATGCAAGCGTCGACCATAATG\n+>30644\n+TTTCTTGGAATTCAGGTGGGA\n+>30645\n+TGTGATGTGACGTAGTGGAAC\n+>30646\n+TCGACGGACATTCCAGATAAGGGGGGC\n+>30647\n+TGCTTGGACTACATATGGTTGAGGGTTGT\n+>30648\n+TAGTGGTTTATCGGCAGTACGCCGGA\n+>30649\n+CTCAATGGTCTAGGGGTATGATTCT\n+>30650\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30651\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30652\n+TCTGCATCATCAGGTATACGTCGTC\n+>30653\n+TGAAACGATGTGGCCAAGGAATTCTG\n+>30654\n+TGCTTGGACTACATATGGTTGAGGGTTG\n+>30655\n+AAGGATTGGCTCTGAAGATTGAGAT\n+>30656\n+TTTTAAGTGATACAGAACTCGATGAAG\n+>30657\n+TGGACGGAGAACTGATAAGGGC\n+>30658\n+TGACTAGATTTCATGCTCGTCTA\n+>30659\n+TGTGATGTGACGTAGTGGAAC\n+>30660\n+TCTGGGTGTTCTTTGAGATTTGGAA\n+>30661\n+TCTTTGGTATTCTAGCTGTAG\n+>30662\n+TATTGCACTTGAGACGGCCTGAAAA\n+>30663\n+GGGGACGTAGCTCAGTGGTAGAGC\n+>30664\n+TAAGACTTTAGAAGTTTGTGTGTG\n+>30665\n+TGTGATGTGACGTAGTGGAAC\n+>30666\n+GGTTCTATGGTGTAATGG\n+>30667\n+TTATTGAGGCGACCAGACTGAGTCCT\n+>30668\n+TGTAAGATCTAATTTAGGGGA\n+>30669\n+TGCTGTGCATCCTGCAAGTAGTTGTA\n+>30670\n+TACATCTCTACACCCCCTCTCCA\n+>30671\n+TCTGAAGGTGTAGTATTTGGTCGA\n+>30672\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30673\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30674\n+TTCGGACTGATTGTTATGGTGGGCA\n+>30675\n+TGTGATGTGACGTAGTGGAAC\n+>30676\n+TGCGATCCGAGTTCAGATCT\n+>30677\n+TGTGCGTAGTCTGTGGAGATCTCC\n+>30678\n+TTAATTGTATTATTTGTTATTTTGGGT\n+>30679\n+TCCCATATTGTCTAGTGGTTAGGATA\n+>30680\n+TGTTGTCATTATCTCCTATTTCTGA\n+>30681\n+TATTGGGCCATCTCGCTGTATTGAA\n+>30682\n+TTCCTACGAATCGCTGTATGAACAGT\n+>30683\n+GCTAACTTTTTTCTTTTAATGG\n+>30684\n+AGCAAGTACTGGTCTCTTAAA\n+>30685\n+TGAGACTGAAATTTATATAGATGT\n+>30686\n+TGAACACAGCTGGTGGTATCC\n+>30687\n+TCTTTGGTATTCTAGCTGTAG\n+>30688\n+TCAGGTACTTAGTGACTCTCAC\n+>30689\n+TTCTTCGCAGAATGCTCTTAGGCT\n+>30690\n+TCAATCCAAGGTATTTCGGG\n+>30691\n+TGTTATGGTTGCGGAACTCCGGAGGTG\n+>30692\n+TGTGATGTGACGTAGTGGAAC\n+>30693\n+GCTGATGTCGTTGTTGTTGCTGCT\n+>30694\n+CTGCTTGGACTACATATGGTTGAGGG\n+>30695\n+AACGAACGAGACTCAAATATATTAAATA\n+>30696\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30697\n+GCTTCAGGAACCAGATCATTGTGGA\n+>30698\n+TTCGTTTTGTCGGTGTTTGGCTTAAGG\n+>30699\n+TGTCACGGTCGCCATGTAGTTAATA\n+>30700\n+ACAACAATTAATAAGATCAAGAAAT\n+>30701\n+GAGGAAAGTAGCAGGTGTTGAG\n+>30702\n+TGTGATGTGACGTAGTGGAAC\n+>30703\n+TTTAGAAAATTTTAAAGGTATGTGA\n+>30704\n+GAACGGCCTCTAGTGCAGATCTTGGT\n+>30705\n+TCAGGTACTTAGTGACTCTCAAA\n+>30706\n+GCTATGCTCTCAACTGCGCGCGCGGT\n+>30707\n+AGTTGATTGAAGATCT'..b'TG\n+>44122\n+TAATTAAAACAGCCTATGTGAGAGA\n+>44123\n+TTCAGTCGGTGGGCGAAGAGTTGAGT\n+>44124\n+TTTTGTAGTCGGGAATCTCCGGA\n+>44125\n+TGTGATGTGACGTAGTGGAA\n+>44126\n+TTGTAATGCTCTGCTCGTTGATCGG\n+>44127\n+GGGGATGTAGCTCAGATGGTAGAGCCT\n+>44128\n+ATGTGATGTGACGTAGTGGAAC\n+>44129\n+TTTCGAGATAATGCGGAAGCAAGCT\n+>44130\n+TATATTGGTTGAACTCTAGATAAC\n+>44131\n+TCCCATATTGTCTAGTGGTTAGGATA\n+>44132\n+CATACCTTGACTGGCAGTCCCGGTGA\n+>44133\n+TGTTTTCTAGACTGGTCAACTCCGA\n+>44134\n+TCCCATATTGTCTAGTGGTTAGGATA\n+>44135\n+CATCACAGTCTGAGTTCTTGC\n+>44136\n+GAATATGAGGCGATTGATTGCA\n+>44137\n+TACAAAGCCTATTGCTAACCTTCTTGA\n+>44138\n+TAGTAGACCTAGAGCATCCAAAAAAC\n+>44139\n+TGATAGTCTGTCATGCTTGAACTGGG\n+>44140\n+TGCTCGCTAGTAGTACCTTCGTACAGC\n+>44141\n+TGCAGTGGATGTAAGATTTTCAATT\n+>44142\n+TGTAAGCAGGCGAGTCAGGATCT\n+>44143\n+TCCAAGTCCATCTAGGTCCCGCAGA\n+>44144\n+TTAGCTGTTCGATTAACT\n+>44145\n+TGTTTGATGACCGAAACTTGGAAA\n+>44146\n+TCTTTGGTGATTTTAGCTGTATAAA\n+>44147\n+GGGGATGTAGCTCAGATGGTAGAGCCT\n+>44148\n+TGAACACAGCTGGTGGTAT\n+>44149\n+TAATGGACTTCGAAGTTGAAGTGGGC\n+>44150\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>44151\n+ACTGACTCGGCTGATGTTTCTCCGGA\n+>44152\n+TACAAAGCCATTTTATCCGGGCTGGAT\n+>44153\n+TGGACGGAGAACTGATAAGG\n+>44154\n+GAAATATGATGATCAAGTA\n+>44155\n+TATTTGCGGACTGACGTCGAT\n+>44156\n+CGGCGCTTCACAGGCGCTGGA\n+>44157\n+GCATCGGTGGTTCAGTGGTAGAATGCT\n+>44158\n+TCACTGGGCTTTGTTTATCTCA\n+>44159\n+GGTTCTATGGTGTAATGG\n+>44160\n+TGGACGGAGAACTGATAAGGGC\n+>44161\n+TGTGATGTGACATAGTGGAAC\n+>44162\n+TCATCTAGTAATCATTAACGTTATA\n+>44163\n+TCAGGTCTAGGCACGAGAAATATT\n+>44164\n+GTTGACAAATTTACAACTGGTTGGGA\n+>44165\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>44166\n+GCATCGGTGGTTCAGTGGTAGAATGC\n+>44167\n+TCCGTAAGTGACTGTGGTTAATCTA\n+>44168\n+TGTGATGTGACGTAGTGGAA\n+>44169\n+TAGTTTAATGACCGATTGCATAATA\n+>44170\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>44171\n+TTGACTATTATTTGGAGAACCACAGC\n+>44172\n+GAGAGTATTAATTCAACCTGATCGCCA\n+>44173\n+TCTTTGGTATTCTAGCTGTAG\n+>44174\n+GAACAGAATTCTTTGAAACGGGGT\n+>44175\n+CATCACAGTCTGAGTTCTTGCT\n+>44176\n+TGGCACGAAGGGTCCTTGTACTGGA\n+>44177\n+TCACTGGGCTTTGTTTATCTCA\n+>44178\n+TCTGACGCGTTTAATTCGGGAGCGAG\n+>44179\n+TGGACGGAGAACTGATAAGGGC\n+>44180\n+TTTGTCTATAGAATATCTTGGTGC\n+>44181\n+TGTGATGTGACGTAGTGGAAC\n+>44182\n+TGGTAGAGAATACTTGTCAAATGAGA\n+>44183\n+GAAGGGCGAGAACGGTAGTGA\n+>44184\n+AACAACAAATATTTCCACGTCTCTGGT\n+>44185\n+TGTGATGTGACGTAGTGGAAC\n+>44186\n+TCCCATATTGTCTAGTGGTTAGGATA\n+>44187\n+TTATGTAAGAATATTTGTCATTAGA\n+>44188\n+TGTGATGTGACGTAGTGGAA\n+>44189\n+TCTTTGGTGATTTTAGCTGTA\n+>44190\n+TAAGTGTGTAAGACAGATTGATAGCTC\n+>44191\n+TGTGATGTGACGTAGTGGAAC\n+>44192\n+TATCACAGCCAGCTTTGATGAGC\n+>44193\n+GGTAGTCGGCGCACAGTTCGTGCC\n+>44194\n+TGTAAAATTTATTTGTAGTGATTGGC\n+>44195\n+ATTAAATCTTTGGATACCA\n+>44196\n+TTGGCCTATGCCTTGAATGTTTATTTTA\n+>44197\n+TGTTAGTAAGCTTATCGGTCTATATGA\n+>44198\n+TACTTATCTTATAAGTTGGTCGCTGAAGG\n+>44199\n+TGCTTGGACTACATATGGTTGAGGGTTGT\n+>44200\n+TCTTTGGTATTCTAGCTGTAGA\n+>44201\n+TGAATTGAGGACACTTAAACAGTTGGT\n+>44202\n+TCTTGGATAGCTGCTCAACCCGTGG\n+>44203\n+TCAAGAACCTGATGAAAGCAGTGGT\n+>44204\n+TCACTGGGCTTTGTTTAT\n+>44205\n+TTTGGGTGCGAGAGGTCCCGGGT\n+>44206\n+TCTTTGGTGATTTTAGCTGTAT\n+>44207\n+TGTGATGTGACGTAGTGGAA\n+>44208\n+ATCGGTGGTTCAGTGGTAGAATGC\n+>44209\n+TTTGTATTATGTGTTTCTGTTAATA\n+>44210\n+TAGAATTTTCTGGAACGGGATGGTCC\n+>44211\n+TCGCAGCGGAACTCCAAGAA\n+>44212\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>44213\n+TATACGATGGAATCTGTTACGGAACA\n+>44214\n+CTTTAGTAGCAGGACCTTGAGATGGCT\n+>44215\n+TAAGACTTTAGAAGTTTGTGTGTG\n+>44216\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>44217\n+TACTAAGTTTAGTAAAGTCACTTGA\n+>44218\n+TACGAAGGGATCTTGGAGTGCAAGGA\n+>44219\n+CCGTATACTGCATTGCCTGAACCCT\n+>44220\n+TTGAGCTGTACTACGCTAATATGGG\n+>44221\n+TATCGCTGTATTAGTCGTCTCTTACGA\n+>44222\n+AGGGTTGTGGTTAAATATAACATTTG\n+>44223\n+TACTCAGTAGCGGAGACGTGGAAATA\n+>44224\n+TCCGGTTTTCGGGCACTGGTTGGAGAA\n+>44225\n+TAGTTTTTTGGCAACCGATCGGGGA\n+>44226\n+TCTTTGGTATTCTAGCTGTAG\n+>44227\n+TCAAATGCAAATTGGATTGAGGAGAA\n+>44228\n+TGGAGCTGATTCGGTAGAAGA\n+>44229\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>44230\n+TGCTTGGACTACATATGGTTGAGG\n+>44231\n+TGTGCTCGCTGTACTATGTCGATGTT\n+>44232\n+TTTTATTTGGCTGTCTGAGCATGGTG\n+>44233\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>44234\n+TGTCCTTAATATCTCGATGAAACACGA\n+>44235\n+TGTGATGTGACGTAGTGGAACA\n+>44236\n+TTGACCGATGCACAGCTGCAGAGC\n+>44237\n+CAACGACTATCCTTAGGTACTCCGGGA\n'
b
diff -r 008de522b3ea -r e11f91575af6 test-data/sample5.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample5.fa Wed Mar 20 07:12:53 2019 -0400
b
b'@@ -0,0 +1,35668 @@\n+>30592\n+AAAACGGTTTGTGTCTCTGCTGAGCTT\n+>30593\n+GGTGAATTTCCGATTTCAAGT\n+>30594\n+TGGACGGAGAACTGATAAGGGC\n+>30595\n+TGGCAGTGTGGTTAGCTGGTTG\n+>30596\n+TGGACTTCACATTTCTTAGGTGTG\n+>30597\n+TTAGATTAAGATTGATGGTATGAACC\n+>30598\n+TAAAATTGTTCCAATGTAAATTCGAA\n+>30599\n+TGTGATGTGACGTAGTGGAAC\n+>30600\n+GGGGATGTAGCTCAGATGGTAGAGCCT\n+>30601\n+TTTTTTTGCCGCCAGCATATCGCTTA\n+>30602\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30603\n+TTAGATCAGATTTGTGGAAAATT\n+>30604\n+TGTACGTTGGTCTTCTTTGAA\n+>30605\n+TATTGTACTGTGAAACTGATGGTT\n+>30606\n+GACTCTTGCCTCCAGCATCCACTCAGA\n+>30607\n+TCAGAATTTGAATGGTCAAGTCGGACT\n+>30608\n+TATTGACTGGCGACGCTTTTGAGG\n+>30609\n+TTCTGATGTCGGTCACATGCTTGGA\n+>30610\n+TTAATTAATAGTATAGATACCA\n+>30611\n+TCTTTGGTATTCTAGCTGTAGA\n+>30612\n+TGATTATTTTAGTTATTTCTTTAGGA\n+>30613\n+AATGGCACTGGAAGAATTCACGG\n+>30614\n+TGCTTGGACTACATATGGTTGAGGGT\n+>30615\n+TATTAGTCAGATACGCCAACAACATGA\n+>30616\n+TTATATGGTTTGTTTTGCGTTGTGGAA\n+>30617\n+TATTTGCACCTCTTCGATAAAACAGTA\n+>30618\n+TACATTGAACTGGATTGATACGTT\n+>30619\n+TTGTTATGTACTTTACTATTTTGGGA\n+>30620\n+TTAGACATGAATGCTATCGGACGTT\n+>30621\n+TGTGTAATTTAGTTTTTCCAGGAGCGG\n+>30622\n+TCTTTGGTATTCTAGCTGTAG\n+>30623\n+CAGATCAACAAAGTACGATGGGGA\n+>30624\n+TTATGGACAATGTTTTGGGTGACCTAG\n+>30625\n+TGTGATGTGACGTAGTGGAACA\n+>30626\n+TCTCGGTAGTGCCTTTAGGGTGGA\n+>30627\n+AAAGGTTGAGCCTATGTAGAACA\n+>30628\n+TGTGATGTGACGTAGTGGAAC\n+>30629\n+TTGAGCTGTACTACGCTAATATGGGC\n+>30630\n+TTTTGTGTTGTTTACTGTTGTTCGAG\n+>30631\n+TCGCGAATACTGTACAGCAGT\n+>30632\n+TAGTGCTTTCTATGTCGGAAATGGATT\n+>30633\n+TTATTGTGTTTGAATGTGTTTATGT\n+>30634\n+GGGCACGTACGGATACCTTGTCGGA\n+>30635\n+CCTCCTCGAGCTCTACCTGATGG\n+>30636\n+TCACTGGGCTTTGTTTATCTC\n+>30637\n+CATGTTACTGTCGAAAACCC\n+>30638\n+TTTCGGACTGCTTCTTAAGGAATTGAA\n+>30639\n+TGGACGGAGAACTGATAAGGGC\n+>30640\n+TTATACCTCATAGACTGGTCGCAAA\n+>30641\n+ATTAAGATATAATTTTGGGT\n+>30642\n+TTTTAAAAATATCGAATTGAAATATT\n+>30643\n+TACATGCAAGCGTCGACCATAATG\n+>30644\n+TTTCTTGGAATTCAGGTGGGA\n+>30645\n+TGTGATGTGACGTAGTGGAAC\n+>30646\n+TCGACGGACATTCCAGATAAGGGGGGC\n+>30647\n+TGCTTGGACTACATATGGTTGAGGGTTGT\n+>30648\n+TAGTGGTTTATCGGCAGTACGCCGGA\n+>30649\n+CTCAATGGTCTAGGGGTATGATTCT\n+>30650\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30651\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30652\n+TCTGCATCATCAGGTATACGTCGTC\n+>30653\n+TGAAACGATGTGGCCAAGGAATTCTG\n+>30654\n+TGCTTGGACTACATATGGTTGAGGGTTG\n+>30655\n+AAGGATTGGCTCTGAAGATTGAGAT\n+>30656\n+TTTTAAGTGATACAGAACTCGATGAAG\n+>30657\n+TGGACGGAGAACTGATAAGGGC\n+>30658\n+TGACTAGATTTCATGCTCGTCTA\n+>30659\n+TGTGATGTGACGTAGTGGAAC\n+>30660\n+TCTGGGTGTTCTTTGAGATTTGGAA\n+>30661\n+TCTTTGGTATTCTAGCTGTAG\n+>30662\n+TATTGCACTTGAGACGGCCTGAAAA\n+>30663\n+GGGGACGTAGCTCAGTGGTAGAGC\n+>30664\n+TAAGACTTTAGAAGTTTGTGTGTG\n+>30665\n+TGTGATGTGACGTAGTGGAAC\n+>30666\n+GGTTCTATGGTGTAATGG\n+>30667\n+TTATTGAGGCGACCAGACTGAGTCCT\n+>30668\n+TGTAAGATCTAATTTAGGGGA\n+>30669\n+TGCTGTGCATCCTGCAAGTAGTTGTA\n+>30670\n+TACATCTCTACACCCCCTCTCCA\n+>30671\n+TCTGAAGGTGTAGTATTTGGTCGA\n+>30672\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30673\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30674\n+TTCGGACTGATTGTTATGGTGGGCA\n+>30675\n+TGTGATGTGACGTAGTGGAAC\n+>30676\n+TGCGATCCGAGTTCAGATCT\n+>30677\n+TGTGCGTAGTCTGTGGAGATCTCC\n+>30678\n+TTAATTGTATTATTTGTTATTTTGGGT\n+>30679\n+TCCCATATTGTCTAGTGGTTAGGATA\n+>30680\n+TGTTGTCATTATCTCCTATTTCTGA\n+>30681\n+TATTGGGCCATCTCGCTGTATTGAA\n+>30682\n+TTCCTACGAATCGCTGTATGAACAGT\n+>30683\n+GCTAACTTTTTTCTTTTAATGG\n+>30684\n+AGCAAGTACTGGTCTCTTAAA\n+>30685\n+TGAGACTGAAATTTATATAGATGT\n+>30686\n+TGAACACAGCTGGTGGTATCC\n+>30687\n+TCTTTGGTATTCTAGCTGTAG\n+>30688\n+TCAGGTACTTAGTGACTCTCAC\n+>30689\n+TTCTTCGCAGAATGCTCTTAGGCT\n+>30690\n+TCAATCCAAGGTATTTCGGG\n+>30691\n+TGTTATGGTTGCGGAACTCCGGAGGTG\n+>30692\n+TGTGATGTGACGTAGTGGAAC\n+>30693\n+GCTGATGTCGTTGTTGTTGCTGCT\n+>30694\n+CTGCTTGGACTACATATGGTTGAGGG\n+>30695\n+AACGAACGAGACTCAAATATATTAAATA\n+>30696\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>30697\n+GCTTCAGGAACCAGATCATTGTGGA\n+>30698\n+TTCGTTTTGTCGGTGTTTGGCTTAAGG\n+>30699\n+TGTCACGGTCGCCATGTAGTTAATA\n+>30700\n+ACAACAATTAATAAGATCAAGAAAT\n+>30701\n+GAGGAAAGTAGCAGGTGTTGAG\n+>30702\n+TGTGATGTGACGTAGTGGAAC\n+>30703\n+TTTAGAAAATTTTAAAGGTATGTGA\n+>30704\n+GAACGGCCTCTAGTGCAGATCTTGGT\n+>30705\n+TCAGGTACTTAGTGACTCTCAAA\n+>30706\n+GCTATGCTCTCAACTGCGCGCGCGGT\n+>30707\n+AGTTGATTGAAGATCT'..b'CCACATGATTCGGCTT\n+>49886\n+ATACATTGTAGACGGTCTTACGGGA\n+>49887\n+TAAACTAATAGAGACAGGTAGAATC\n+>49888\n+TTGTTGCAATGTCTGACTGGGGTTCGT\n+>49889\n+TGCTTGGACTACACATGGTTGAGGGTTGTA\n+>49890\n+GGGTCAGGCGATGATGAATT\n+>49891\n+TGTGATGTGACGTAGTGGAACA\n+>49892\n+TTGGGATATTGTTGGAAATGATTTTT\n+>49893\n+TATTCGAGAATTTTGTGATTAGTGA\n+>49894\n+TTGTGGAATGTTCGTGTCGAA\n+>49895\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49896\n+TGGAAGACTAGTGATTTTGTTG\n+>49897\n+TGCGACGGCGGCGAACGTAGTA\n+>49898\n+TGTGATGTGACGTAGTGGAA\n+>49899\n+TGATGACGAGCGTTCTTTTAGA\n+>49900\n+TCAGGGTGACCACACATTTCAAGGA\n+>49901\n+TGGAATTTCGTTGTGTCGTCAGTGTGA\n+>49902\n+AAGGACCCGAGGGCTGCAACCTTTTC\n+>49903\n+TTAGATAACTGAAAGCAAGTACTGG\n+>49904\n+AAGAAGCCGTCGAGAGATATCGGA\n+>49905\n+TGTTATCGATCATTTTAGTTCGCTGA\n+>49906\n+TCCCATATTGTCTAGTGG\n+>49907\n+TACATGGTTGTCTTGTAGAGTTGACGC\n+>49908\n+TGAGCGGAGAACCAGAGTTGATGTG\n+>49909\n+TATAGGTCTGATTCTAAAATGGGTGA\n+>49910\n+GGACTGACTCGTGTAGTGTGCACT\n+>49911\n+TGTTTTTCGGCCTTATAAACGGGG\n+>49912\n+CCTAATAAGAATTGAGGGATCAGGA\n+>49913\n+TATTTGTGCTGCCTCCTCTGAAATCA\n+>49914\n+TGTTATGTTGCCAGTCTGAGTCGTCAG\n+>49915\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49916\n+CAATATCGTCAACATCCTCGAACG\n+>49917\n+TGTGATGTGACGTAGTGGAAC\n+>49918\n+TTTAAGCTGGTAGGTGTAGAAGCCT\n+>49919\n+TTCTTTCGGATTCTGAAGTAATTAAT\n+>49920\n+GAAGATGAAACTGTTCTGGACGGA\n+>49921\n+TCACTGGGCTTTGTTTATCTCA\n+>49922\n+TCAGGTACTTAGTGACTCTCAA\n+>49923\n+ATCGAAAAGATTCGCTGAAGTTGGGC\n+>49924\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49925\n+TAGATGTAAGAATAACTGTTTTGA\n+>49926\n+TAGCAACCAGGTCATCTTCAAACT\n+>49927\n+TTTATTGGAGAGGTTGATCCT\n+>49928\n+GAAGGGTTCGGGCTCAATTAGAGGGT\n+>49929\n+CATGGGTTCTGATGTGTTTTCACGA\n+>49930\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49931\n+TCCATTTTTGAACACAGTTTGATGGGA\n+>49932\n+TAACTGAAGTATCTGAGGATTGGATT\n+>49933\n+TTTGAACTGGTCTGTGTTGAATTCAACC\n+>49934\n+GACGTTAGAAATCCGTTGGTGGA\n+>49935\n+TGCTTGGACTACATATGGTTGAGGG\n+>49936\n+TGGACTCGTTAGGTATGGATGTTGC\n+>49937\n+TGAGTCCCACAATACTGTATATA\n+>49938\n+TAAGACTATAATTGATGGAATGAACT\n+>49939\n+TGTTATACTCAGATACAGACGGTTCGA\n+>49940\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49941\n+TCTGAGTTTATTTTTAAAAGGTGTT\n+>49942\n+TCAGGTACTTAGTGACTCTCA\n+>49943\n+TCAGGATTGCTGAGCTGTGCGGTA\n+>49944\n+GGGGATGTAGCTCAGTGGA\n+>49945\n+AATATCATCTATTCTCGGTAGTGGA\n+>49946\n+GGTTCCATGGTGTAATGG\n+>49947\n+TGCGAGGTCAATGGTTGTAAAGTA\n+>49948\n+TCTTTGTAGTCGGTTGGCTGTACAGGT\n+>49949\n+GGCACTTGAACTTGGCACTGGACGC\n+>49950\n+GGGGACGTAGCTCAGTGGGG\n+>49951\n+TGCATAAGATATTCCGCCTCGAGAAGA\n+>49952\n+TGGACGGAGAACTGATAAGGG\n+>49953\n+TATTAACCAGACTGCAAATATACT\n+>49954\n+AACAATTAGGACCACGGCGATGGT\n+>49955\n+TTGGTGTAATCTTGGATCGGAGAC\n+>49956\n+CTAACTGTGAAAGGGGAATTGACCGGC\n+>49957\n+TGTAAAAGGGTGTCTCACTGCGGC\n+>49958\n+TTCTTATGGATTCTGAGTGATGTG\n+>49959\n+TTACAAAGTTGTAGATTGGTCGGGG\n+>49960\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49961\n+TAAATTGATTTAGTTTGAATTTAGA\n+>49962\n+TCTTTGGTATTCTAGCTGTAG\n+>49963\n+TAATACTGTCAGGTAAAGATGTCA\n+>49964\n+TATGCCGTAAGCTTGTTGCGCGCGGA\n+>49965\n+ACTTTCGGACTGATTGTTATGGTGG\n+>49966\n+TAATTGAGTACAGTTGGACATAGC\n+>49967\n+GCGGAGGTTGCGGTGCTGTACACTG\n+>49968\n+TTTCAGTTCGGGTAGGGTGGG\n+>49969\n+TGAGTACAAGCCCTTCTGGCGTGAAGA\n+>49970\n+TTTTGGAGCCACTTTCGCCTCGTAGGA\n+>49971\n+TCTATCCAAAGAGCTGATTGTCATACT\n+>49972\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49973\n+TTTGTGCAAATCAATATAAATTGA\n+>49974\n+TCGAATTGCTGAATGCCGAAGTAAAATA\n+>49975\n+AGATATGTTTGATATTCTTGGTTG\n+>49976\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49977\n+TCCCCCCCTAAGATTATGACCGTC\n+>49978\n+TTGCAGACGGACTTTTGAATTCACAGG\n+>49979\n+TGCTTGGACTACATATGGTTGAGGG\n+>49980\n+TGAGGTAGTAGGTTGTATAG\n+>49981\n+TGACGATGCCGCTGTAGAGCTTG\n+>49982\n+TGTTGTGTTGCGACAGTAGAGCGC\n+>49983\n+TACATGGCGACTTTCTTGCAACTGAACT\n+>49984\n+TGCAGGAAGCTTTCAAGGCAACAGA\n+>49985\n+TTAGGACGAGATTCGCTAATGCAAT\n+>49986\n+TAACATATGTGCAAGTTATTGGGA\n+>49987\n+TGTGATGTGACGTAGTGGAA\n+>49988\n+AACTTCTTCTATCTTTGTGCGGGA\n+>49989\n+TATTTGGGTCACCGGGTTAAGTAGCGC\n+>49990\n+ATATTGTCTAGTGGTTAGG\n+>49991\n+TGAGGTAGTAGGTTGTATAGT\n+>49992\n+TCTTGGACTGAGCAGCTACTGTTTG\n+>49993\n+GCTCTCTTGAGTGGATTGCGCATGGA\n+>49994\n+TGATCTGGGGTGCATGGTAATCGG\n+>49995\n+AATGGCACTGGAAGAATTCAC\n+>49996\n+CGGGAAACTATGGATCAAATG\n+>49997\n+ATCTGCCTGAGTCGACTGTTCCGTAA\n+>49998\n+TTTGAGCAGCGAATCTGGAACGGT\n+>49999\n+TCGAAGACTAGACGGATTTTTCCCGGCT\n+>50000\n+TATTTAGAAAAACAGGTGAGTGA\n'
b
diff -r 008de522b3ea -r e11f91575af6 test-data/sample_output.pdf
b
Binary file test-data/sample_output.pdf has changed
b
diff -r 008de522b3ea -r e11f91575af6 test-data/sample_output.tab
--- a/test-data/sample_output.tab Sun Feb 10 18:31:51 2019 -0500
+++ b/test-data/sample_output.tab Wed Mar 20 07:12:53 2019 -0400
b
@@ -1,8 +1,3 @@
-2L-tail.fa 638
-
-dme_miR21_hairpin.fa 0
-
-Ensembl_transposon_set.fa 560
-
-Not classified 78
-
+sample.fastq dme_miR21_hairpin.fa 0 638
+sample.fastq Ensembl_transposon_set.fa 560 638
+sample.fastq Not classified 78 638