Repository 'filter_spades_repeats'
hg clone https://toolshed.g2.bx.psu.edu/repos/nml/filter_spades_repeats

Changeset 0:90957420cc07 (2017-10-12)
Next changeset 1:0e3d2c8b1b23 (2017-11-07)
Commit message:
planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
added:
filter_spades_repeats.pl
filter_spades_repeats.xml
test-data/Discarded_sequences.fasta
test-data/Filtered_sequences_(no_repeats).fasta
test-data/Filtered_sequences_(with_repeats).fasta
test-data/Repeat_sequences.fasta
test-data/Results_summary.txt
test-data/SPAdes_scaffold_stats.tabular
test-data/SPAdes_scaffolds_(fasta).fasta
b
diff -r 000000000000 -r 90957420cc07 filter_spades_repeats.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_spades_repeats.pl Thu Oct 12 15:04:45 2017 -0400
b
b'@@ -0,0 +1,313 @@\n+#!/usr/bin/env perl\n+\n+use strict;\n+use Getopt::Long;\n+use Bio::SeqIO;\n+use Pod::Usage;\n+\n+my ($fasta_file, $tab_file, $coverage_co, $length_co, $repeat_co, $out_filtered, $out_repeats, $out_norepeats,$coverage_length_co, $summary_out, $filtered_repeats, $help);\n+\n+GetOptions(\n+\t\'c|coverage-cutoff=s\'\t       => \\$coverage_co,\n+\t\'l|length-cutoff=s\'\t       => \\$length_co,\n+        \'e|coverage-length-cutoff=s\'   => \\$coverage_length_co,\n+\t\'r|repeat_cutoff=s\'\t       => \\$repeat_co, \n+\t\'i|input=s\'\t\t       => \\$fasta_file,\n+\t\'t|tab=s\'\t\t       => \\$tab_file,\n+\t\'f|filtered-out=s\'\t       => \\$out_filtered,\n+\t\'o|output-repeats=s\'\t       => \\$out_repeats,\n+\t\'u|output-norepeats=s\'\t       => \\$out_norepeats,\n+        \'n|filtered-repeats=s\'         => \\$filtered_repeats,\n+        \'s|summary=s\'                  => \\$summary_out,\n+\t\'h|help\'\t\t       => \\$help\n+);\n+\n+pod2usage(-verbose => 2) if ($help);\n+print "A fasta file is required. Please enter a fasta file using the -i flag.\\n" if (!$fasta_file);\n+print "A spades tabs file is required. Please enter a tabs file using the -t flag\\n" if (!$tab_file);\n+pod2usage(1) unless $fasta_file && $tab_file;\n+\n+if (!$coverage_co)\n+{\n+   $coverage_co = 0.33;\n+}\n+if (!$length_co)\n+{\n+   $length_co = 1000;\n+}\n+if (!$coverage_length_co)\n+{\n+   $coverage_length_co = 5000;\n+}\n+if (!$repeat_co)\n+{\n+   $repeat_co = 1.75;\n+}\n+if (!$out_filtered)\n+{\n+   $out_filtered = "Discarded_sequences.fasta";\n+   print "Discarded sequences will be printed out to $out_filtered\\n";\n+}\n+if (!$out_repeats)\n+{\n+   $out_repeats = "Filtered_sequences_with_repeats.fasta";\n+   print "Filtered sequences with repeats will be printed out to $out_repeats\\n";\n+}\n+if (!$out_norepeats)\n+{\n+   $out_norepeats = "Filtered_sequences_no_repeats.fasta";\n+   print "Filtered sequences without repeats will be printed out to $out_norepeats\\n";\n+}\n+if (!$filtered_repeats)\n+{\n+   $filtered_repeats = "Repeat_sequences.fasta";\n+   print "Repeat sequences will be printed out to $filtered_repeats\\n";\n+}\n+\n+die ("No tab file specified") unless ($tab_file);\n+die ("No fasta file specified") unless ($fasta_file);\n+\n+##Read tab file and discard rows with comments\n+open TAB, \'<\', $tab_file or die "Could not open tab file: $?";\n+open SEQIN, \'<\', $fasta_file or die "Could not open tab file: $?";\n+open SEQOUT_REP, \'>\', $out_repeats or die "Could not open file for writing: $?";\n+open SEQOUT_NOREP, \'>\', $out_norepeats or die "Could not open file for writing: $?";\n+open SEQOUT_FILT, \'>\', $out_filtered if ($out_filtered);\n+open SEQOUT_FILT_REP, \'>\', $filtered_repeats or die "Could not open file for writing: $?";\n+open SUMMARY, \'>\', $summary_out if ($summary_out);\n+\n+\n+my $avg_coverage = 0;\n+my $num_contigs = 0;\n+my $cutoff_coverage;\n+my $cutoff_repeats;\n+my @stats;\n+\n+\n+while (<TAB>)\n+{\n+\tchomp;\n+\tpush @stats, $_ unless (/^#/);\n+}\n+\n+#Calculate average coverage.\n+foreach my $stat(@stats)\n+{\n+\tmy ($length, $coverage);\n+\t(undef,$length, $coverage) = split(/\\t+/, $stat);\n+        die "length or coverage not defined at $stat\\n" unless ($length && ($coverage ne \'\' && $coverage >= 0));\n+\tif ($length >= $coverage_length_co)\n+\t{\n+\t\t$avg_coverage = $avg_coverage + $coverage;\n+\t\t$num_contigs++;\n+\t}\n+}\n+\n+$avg_coverage = $avg_coverage / $num_contigs;\n+$cutoff_coverage = $avg_coverage * $coverage_co;\n+$cutoff_repeats = $avg_coverage * $repeat_co;\n+\n+print SUMMARY "Filter SPAdes repeats Results Summary\\n======================================\\n\\n" if ($summary_out);\n+print SUMMARY "Paramaters used:\\nLength cutoff for calcularing average cutoff: $coverage_length_co\\nCoverage cutoff ratio: $coverage_co\\nRepeat cutoff ratio: $repeat_co\\nLength cutoff: $length_co\\n\\n" if ($summary_out);\n+\n+print SUMMARY "Calculations:\\nAverage coverage: $avg_coverage\\nCoverage cutoff: $cutoff_coverage\\nRepeat cutoff: $cutoff_repeats\\n\\nFile headers:\\n" if ($summary_out);\n+\n+my ($header, $seq_id, $seq); \n+my $repeated = 0;\n+my $valid = 0;\n+\n+#Summary strings:\n+my $discarded = "";\n+my $'..b'" copies)";\n+\t\t\t\tprint SEQOUT_REP $header,"\\n";\n+                                $filtered_rep = $filtered_rep.$header."\\n";\n+                                print SEQOUT_FILT_REP $header, "\\n";\n+                                $repeats = $repeats.$header."\\n";\n+\t\t\t\t$repeated = 1;\n+\t\t\t}\n+\t\t\telse\n+\t\t\t{\n+\t\t\t\tprint SEQOUT_REP $header, "\\n";\n+                                $filtered_rep = $filtered_rep.$header."\\n";\n+\t\t\t\tprint SEQOUT_NOREP $header, "\\n";\n+                                $filtered_norep = $filtered_norep.$header."\\n";\n+\t\t\t\t$repeated = 0;\n+\t\t\t}\n+\t\t}\n+\t\telsif ($out_filtered)\n+\t\t{\n+\t\t\t$valid = 0;\n+\t\t\tprint SEQOUT_FILT $header,"\\n";\n+                        $discarded = $discarded.$header."\\n";\n+\t\t}\n+\t}\n+\telse\n+\t{\n+\t\tif ($valid)\n+\t\t{\n+\t\t\tprint SEQOUT_REP $line;\n+\t\t\tif (!$repeated)\n+\t\t\t{\n+\t\t\t\tprint SEQOUT_NOREP $line;\n+\t\t\t}\n+                        else\n+                        {\n+                              print SEQOUT_FILT_REP $line;\n+                        }\n+\t\t}\n+\t\telsif ($out_filtered)\n+\t\t{\n+\t\t\tprint SEQOUT_FILT $line;\n+\t\t}\n+\t}\n+\t\n+}\n+\n+close TAB;\n+close SEQIN;\n+close SEQOUT_REP;\n+close SEQOUT_NOREP;\n+close SEQOUT_FILT;\n+close SEQOUT_FILT_REP;\n+\n+\n+#Get summary info:\n+if ($summary_out)\n+{\n+   print SUMMARY "Filtered sequences (with repeats):\\n$filtered_rep\\n";\n+   print SUMMARY "Filtered sequences (no repeats):\\n$filtered_norep\\n";\n+   print SUMMARY "Repeat sequences:\\n$repeats\\n";\n+   if ($out_filtered)\n+   {\n+      print SUMMARY "Discarded sequences:\\n$discarded\\n"; \n+   }\n+\n+   close SUMMARY;\n+}\n+\n+die "More rows in stats file than sequences in the fasta file\\n" if (scalar(@stats) > 0);\n+exit 0;\n+\n+\n+__END__\n+\n+\n+\n+=head1 NAME\n+\n+\tfilter_spades_repeats.pl - Filters contigs or scaffolds based on contig length and detects contigs/scaffolds with very high coverage.\n+\n+\n+\n+=head1 USAGE\n+\n+\tfilter_spades_output.pl -i <contigs/scaffolds input>\n+                                -t <stats input>\n+                                -o <output fasta with repeats>\n+                                -u <output fasta without repeats>\n+                                \n+                                Optional:\n+                                -c <coverage cutoff ratio> (default 0.33) \n+\t\t\t\t-l <length cutoff> (default: 1000)\n+                                -e <length cutoff for average coverage calculation> (default: 5000)\n+\t\t\t\t-r <repeat cutoff ratio> (default (1.75)\n+                                -n <filtered repeated sequences>\n+                                -f <discarded sequences>\n+                                -s <output summary file>\n+\n+                                For more information:\n+                                -h\n+\n+\n+=head1 INPUT\n+\n+=over 8\n+\n+=item B<-i>B<--input>\n+\n+Contigs/Scaffolds fasta file.\n+\n+=item B<-t>B<--tab>\n+\n+The tabular output file from SPAdes. This file should have the following format:\n+\n+      #name length   coverage\n+\n+      NODE_1   31438 24.5116\n+      \n+      NODE_2   31354 2316.96\n+\n+      NODE_3   26948 82.3294\n+\n+=item B<-o>B<--output-repeats>\n+\n+Output fasta file including the contigs marked as repeated.\n+\n+=item B<-u>B<--output-norepeats>\n+\n+Output fasta file excluding the contigs marked as repeated.\n+\n+=item B<-c>B<--coverage-cutoff>\n+\n+Mininum coverage ratio. \n+\n+\tcoverage_theshold = average_coverage * minimum_coverage_ratio.\n+\n+Any contigs/scaffolds with coverage below the coverage_theshold will be eliminated.\n+\n+=item B<-l>B<--length-cutoff>\n+\n+Mininum length. Contigs below this length will be eliminated.\n+\n+=item B<-e>B<--coverage-length-cutoff>\n+\n+Minimum length to use for average coverage calculations.\n+\n+=item B<-r>B<--repeat-cutoff>\n+\n+Minimum repeats ratio. \n+\n+\trepeat_threshold = average_coverage * repeat_ratio. \n+\n+Any contigs with coverage below this threshold will be considered to be repeated\n+\n+\n+=item B<-f>B<--filtered-out>\n+\n+If specified, filtered out sequences will be written to this file.\n+\n+=item B<-s>B<--summary>\n+\n+A summary of results\n+\n+=back\n+=cut\n'
b
diff -r 000000000000 -r 90957420cc07 filter_spades_repeats.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_spades_repeats.xml Thu Oct 12 15:04:45 2017 -0400
[
@@ -0,0 +1,179 @@
+<tool id="filter_spades_repeat" name="Filter SPAdes repeats" version="1.0.0">
+ <description>Remove short and repeat contigs/scaffolds</description>
+ <requirements>
+ <requirement type="package" version="1.6.924">perl-bioperl</requirement>
+ </requirements>
+ <command detect_errors="exit_code"><![CDATA[
+
+ perl $__tool_directory__/filter_spades_repeats.pl 
+
+ -i '$fasta_input' 
+ -t '$tab_input' 
+ -c '$cov_cutoff' 
+ -r '$rep_cutoff' 
+ -l '$len_cutoff' 
+ -o '$output_with_repeats' 
+ -u '$output_without_repeats' 
+ -n '$repeat_sequences_only'
+ -e '$cov_len_cutoff' 
+ -f '$discarded_sequences' 
+ -s '$summary'
+
+ ]]></command>
+
+ <inputs>
+ <param name="fasta_input" type="data" format="fasta" label="Contigs or scaffolds file" help="Contigs/Scaffolds output file from Spades" />
+ <param name="tab_input" type="data" format="tabular" label="Stats file" help="Enter the corresponding stats file of the fasta file input above" />
+ <param name="cov_cutoff" type="float" value="0.33" min="0" label="Coverage cut-off ratio" help="This is the average coverage ratio cutoff. For example: if the average coverage is 100 and a coverage cut-off ratio of 0.5 is used, then any contigs with coverage lower than 50 will be eliminated." />
+ <param name="rep_cutoff" type="float" value="1.75" min="0" label="Repeat cut-off ratio" help="This is the coverage ratio cutoff to determine repeats in contigs. For exmaple: if the average coverage is 100 and a repeat cut-off ratio of 1.75 is used, then any contigs with coverage more than or equal to 175 will be marked as repeats." />
+ <param name="len_cutoff" type="integer" value="1000" min="0" label="Length cut-off" help="Contigs with length under the chosen cut-off will be eliminated." />
+ <param name="cov_len_cutoff" type="integer" value="5000" min="0" label="Length for average coverage calculation" help="Only contigs above this length will be used to calculate the average coverage." />
+ <param name="keep_leftover" type="select" label="Print out a fasta file containing the discarded sequences?">
+ <option value="yes">Yes</option>
+ <option value="no">No</option>
+ </param>
+ <param name="print_summary" type="select" label="Print out a summary of all the results?">
+ <option value="yes">Yes</option>
+ <option value="no">No</option>
+ </param>
+ </inputs>
+ <outputs>
+ <data format="fasta" name="output_with_repeats" label="Filtered sequences (with repeats)" />
+ <data format="fasta" name="output_without_repeats" label="Filtered sequences (no repeats)" />
+ <data format="fasta" name="repeat_sequences_only" label="Repeat sequences" />
+ <data format="fasta" name="discarded_sequences" label="Discarded sequences">
+ <filter>keep_leftover == "yes"</filter>
+ </data>
+ <data format="txt" name="summary" label="Results summary">
+ <filter>print_summary == "yes"</filter>
+ </data>
+ </outputs>
+  <tests>
+        <test>
+            <param name="fasta_input" value="SPAdes_scaffolds_(fasta).fasta"/>
+            <param name="tab_input" value="SPAdes_scaffold_stats.tabular"/>
+            <output name="output_with_repeats" value="Filtered_sequences_(with_repeats).fasta"/>
+            <output name="output_without_repeats" value="Filtered_sequences_(no_repeats).fasta"/>
+            <output name="repeat_sequences_only" value="Repeat_sequences.fasta"/>
+            <output name="discarded_sequences" value="Discarded_sequences.fasta"/>
+            <output name="summary" value="Results_summary.txt"/>
+        </test>
+    </tests>
+ <help><![CDATA[
+********************
+  What does it do?
+********************
+Using the output of SPAdes (a fasta and a stats file, either from contigs or scaffolds), it filters the fasta files, discarding all sequences that are under a given length or under a calculated coverage. Repeated contigs are detected based on coverage.
+
+**********
+  Output
+**********
+
+ - **Filtered sequences (with repeats)** 
+ - Will contain the filtered contigs/scaffolds including the repeats. These are the sequences that passed the length and minumum coverage cutoffs.
+ - For workflows, this output is named **output_with_repeats**
+ - **Filtered sequences (no repeats)**   
+ -  Will contain the filtered contigs/scaffolds excluding the repeats. These are the sequences that passed the length, minimum coverage and repeat cutoffs.
+ - For workflows, this output is named **output_without_repeats**
+ - **Repeat sequences**                  
+ - Will contain the repeated contigs/scaffolds only. These are the sequences that were exluded for having high coverage (determined by the repeat cutoff).
+ - For workflows, this output is named **repeat_sequences_only**
+ - **Discarded sequences**               
+ - If selected, will contain the discarded sequences. These are the sequences that fell below the length and minumum coverage cutoffs, and got discarded.
+ - For workflows, this output is named **discarded_sequences**
+ - **Results summary**  : If selected, will contain a summary of all the results.
+
+************
+  Example
+************
+
+Stats file input:
+------------------
+
++------------+------------+------------+
+|#name       |length      |coverage    |
++============+============+============+
+|NODE_1      |2500        |15.5        |
++------------+------------+------------+
+|NODE_2      |102         |3.0         |
++------------+------------+------------+
+|NODE_3      |1300        |50.0        |
++------------+------------+------------+
+|NODE_4      |1000        |2.3         |
++------------+------------+------------+
+|NODE_5      |5000        |14.3        |
++------------+------------+------------+
+|NODE_6      |450         |25.2        |
++------------+------------+------------+
+
+User Inputs:
+------------
+
+- Coverage cut-off ratio = 0.33 
+- Repeat cut-off ratio = 1.75  
+- Length cut-off = 500
+- Length for average coverage calculation = 1000
+
+Calculations:
+-------------
+
+**Average coverage will be calculatd based on contigs with length >= 1000bp**
+
+
+- Average coverage = 15.5 + 50.0 + 2.3 + 14.3 / 4 = 20.5
+
+**Contigs that have coverage in the lower 1/3 of the average coverage will be eliminated.**
+
+- Coverage cut-off = 20.5 * 0.33 = 6.8
+
+**Contigs with high coverage (larger than 1.75 times the average coverage) are considered to be repeated contigs.**
+
+- Repeat cut-off = 20.5 * 1.75 = 35.9
+
+**Number of copies are calculated by dividing the sequence coverage by the average coverage.**
+
+- Number of repeats for NODE_3  = 50 / 20.5 = 2 copies
+
+
+Output (in fasta format):
+--------------------------
+
+**Filtered sequences (with repeats)**
+
+::
+
+>NODE_1
+>NODE_3 (2 copies)
+>NODE_5
+
+**Filtered sequences (no repeats)**
+
+::
+
+>NODE_1
+>NODE_5
+
+**Repeat sequences**
+
+::
+
+>NODE_3 (2 copies)
+
+**Discarded sequences**
+
+::
+
+>NODE_2
+>NODE_4
+>NODE_6
+
+ ]]></help>
+    <citations>
+        <citation type="bibtex">@ARTICLE{a1,
+            title = {Filter SPAdes repeats Remove short and repeat contigs/scaffolds},
+            author = {Mariam Iskander},
+            url = {https://github.com/phac-nml/galaxy_tools/}
+            }
+        }</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 90957420cc07 test-data/Discarded_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Discarded_sequences.fasta Thu Oct 12 15:04:45 2017 -0400
b
b'@@ -0,0 +1,1034 @@\n+>SRR6126859_31 length_7815_cov_9.7826\n+ATATAGAATATCCAATAATCAAATTATATCATATTACGAATTAGGTTTCCCAAAAGATGC\n+CGTTTCAGAATTGATATTAGGTCCAAACAATAAATTTAAAGAAAGCGATATTGTTAACTT\n+TTTGCAGTATAATGGTTTTGAACACAGCATAAAAATATTGAAATCAAAGGCCAGTTATGG\n+AGCTTAAATAAAAATGTATTTTCTATTTTGCTATTCATTATAATTATTATTACGATATGC\n+CAGTGGTCTCAACTTTGAAACATGGGACTTTCCTGAATACTCTGAGGGGCTTGATGGTAT\n+GAGATCTATGTATTTATCAATCTCTGTTTGTACATCTTTCGGACGACTTGGAAAACGAAC\n+GGTTTTGTCATGGTCTGATTCATTGCCATAGATGCTTGCTTCTGGTGATGTCTGGTCATT\n+GTTAAATTTAGCGCGTTTTTCAAACTCCGAGCGTGGAACATTGATACCCAGCACTTCATC\n+ATAGACATAGTTCTCTGCGATACGCTTATCATGGTCAGCATCAGTGGTATCGGGTTTCCT\n+TCTGGCCCATATAAATTTCTCTCTTATATATCCTACTATCAGTAGCGTGAAACGTTTAAA\n+CAAACCATTCTGGCGGGGCTTGGTCTGTTTCCTACTCCTCATCGGAGCGCGGTTCTTCGC\n+TCTGGTGTTTTGAATAGATGGGGTTTTCAATTTAGTGGATGCAGTTGGTTCAGGAACAAT\n+CAGAACGGGAGATTTCTTCCCTGATACATTTACTTTACTGACCTTATTGGGATTACGGTA\n+AGATTTTGTTGCTGGTCTTCTGCTTGTGGCGTGTGTCGTCATAACAGGTTGCTGTTTAGT\n+TGTAGCCGGGAAGTCAGGGATAACGATTTCTTCGGGTTCAGCAAGATGTGTAATCGTAAA\n+GCTTCTGACATCAACCACGATCTCCAGTGGTAGCTCACTGAATGTGTTGTAGACCTTCTT\n+CGCTTCCTCTATTTGCTGATCACGTAAAGCTTGTTCTGTAGCTCGCTGTTCCTGTGACTC\n+GGTGTTATTCCACTTGGCACGATGAACTCGTTGCATTGCTGGACGGTTTGTTTCGGTTGC\n+TTTGGCAAGCCAGAAGGCTTTTTCTTCAGCGCTGAATGCTTCTTCTGCTTGGGCAAGGGC\n+ATCAGCACATTGAGTTCTGAGAGAACGGTGATCGATACGTTCTTCACGGCCTGCGGTCTC\n+AAGGTAGCTGTTTGACATCGTGGCCCACGACTCACGCCACTGAACAACCATCTTCTTATC\n+ATTCCAGCTTCTGTCTTTTTTGCCGAAGCCTTCTGGACCAATGGTTTTCAGGGTTAACAT\n+GACGTGAGCATGTGGGTTTTTACTGTCCAGATCATGAAATGCGATATCAGCAATCATCCC\n+CTTATCGACAAAGTTTTTCTGGCAGTATTCGGCAACTAATTTTTTCTTGTCATCATTGCT\n+GAGTTCTACGGGGATAGCGACATCAAAATAACGCGCTGTTTGTCCGTTGTTCTGGCGTTC\n+CACTCTTTCAACTTCATTCCACAATGCTGAGGAGCTTTCAACAATATGAGCAGGTGCGGA\n+AACAGGTGCCAATATAATATGACCATGTAGATCTGTTCTGTGGCTGAAATCGTATGTTTC\n+ACCGATGCGATCATCTGTAATACGTGTACGGGTATGGTAGGCAGCCTTCGCAACGGAAGT\n+CATGCCTTCGCTTCGTTTCACAATTTTAAAATCCAGATGAAAAATCGCCATTAATAAATT\n+AACCTCAAATATAAATGCTGCCGTTAGTTTTACGTAAGAAAAACTTCGGGGTTGACGTTG\n+GTTTTGCTTTTAGCCTGCGGCAGGCAACCGTCGGTAGACCCCACACACTGCGTAGCAGTG\n+TATAGGTGGGCATTGTCTTTAAAAAATAAGCCAATGTAAAAACTCCCCCTTAGTTAATTA\n+ATACGAAATATACTGATAGTTGTAAAATGATGCGTTAAATTAAACGACACGAACTGGATT\n+AAAGGTGTAGGTTTCACGTCCGAAGGGAAAAAATAAAGAAAAGGCCCGTAATGGCAAAAG\n+AAGTATTAATTGTTCCCGCCGAAAGGCGGGATAGCTAAAATTGAAGAAAAAAGCTAATAA\n+AACGGAGAGTGTTATGTGTTGCACAATTCACAGGTTGAGGTTGAAAAATAGAGTTTAAAG\n+CTGATAGGTGAAAATAAAATAACAAAAGTACAAAAATAAGTAACGCGGAGAGTGCCAGAT\n+GTTTTGTGTTAGTGATTACTATCGCATGTATTGCACTATTTTATTTATGGATGCTCCAGA\n+ACTATTCCTTCTGACAAAACGGTGTAAATTTGACGTTATTTACCTTTGTGATAATTAAAT\n+ACGTATAAACAATAAGAGGTAATAATCATGGATAATGAAACTAAAAGATCAAGAACAGAA\n+AAAACATTAAAACAAAAGGTGGCATTTGCTCAACTTGAACTTAACCGTTTAAAGTCAATG\n+GAGAAATCAGAACAAAAGAAAGTTGAAACAAGGCTTAAGATTATTCTTGGGGCAGAAGTA\n+GCCAAGGCTATGAATTGTGGTATCGAACAGGTGGATAAGGAACTTGTTATGGGGATTTTA\n+CTTTCAGCATCTGAGTTGAATGATATTGAAAGAGTCAAATATATAAAAGCAGGGAGATGG\n+TTTCTTGCTCAAATGGATGGCAGACAAAAATAAAAAATATTAAATAAATTTCATGACAGG\n+GGTTGATATCTAATTAATATCTGTTATCTATATAAAAGAACAGAGCGCAGGATAATACTT\n+TATTAATACATATTTTTTCCTTTTCAATCAATCATGCGTTCTGTTCGACCAGTAATCTGT\n+TAATTTTTATCTTTATGTAGTAAACGTCATTATTACTTTTTCATCGGATATGATTATATA\n+TCTCATGGGAAGGTGATAATGACAAACGCTGTGATAATAAATGTTGTATTTAAATCTCCC\n+AAAAGGCAAAGAGCGATCTTTGCCTTTTTCAATTCATCCGTCTATCAGATAGAGTTAAAT\n+ATATCTTACGCCATCTATTGCCAGAAATGAGAGAAGCGTTATAACAGGAGATATCATTAA\n+AATAAATAAAGGAGATAATAATGGTTTTATTAAATAGTAAAAGAAAAAGCAAAAAAGGTT\n+TCTCATTACTCGAATTATTACTTGTTCTGGGGATTATTGCCGCATTAGTAGTAGCGGCTT\n+TTATTGTGTATCCCAAGGTTCAGGCTTCACAAAGAGCGCAAGCGGAAAGTAATAATATTG\n+CCACCATACAGGCAGGTGTCAAAGCACTCTATACATCAGCATCCAGCTTTACTGGGCTAA\n+CGAATACCGTAGCTGTTCAGGCAAAAATCTTCCCTGATAATATGTTAAGTGGAACAGGCA\n+ATGCAGCTAAACCAATTAACGCATTCAAAGGGAACGTGACGCTGGCTGCGGCAGCTACAG\n+GTCCGTCATCTGCGGCAGGTTCTTCTTTCACTATCACATATGACAACGTGCCAGCAGCAG\n+AATGTGTGAAAATAACCACAGCGGCGGCAGGCAATTTTTACACTGCTAAAGTGGGTTCTA\n+AAGTAGTAAAAGCTGCAGATGGTACCCTTGATGTAGCAGCTACCGCCGCAGCATGTAATA\n+ACGCTACAAGTAATACATTAGTATTCACGTCTATTTAATTAAAGGGGCACTTTATGCCCC\n+TTTTTGGATCATATTTTCAGATGGGAGATATAATTATGATAAAGAAAAGAGGATTTACTC\n+TATTAGAGATCACCATCGTTTTAGGTATAGGTTCTCTTATTGGTTTTATGAAGTTTCAGG\n+ACATGAGGAAAGAACAGGAAGCAGTAATGGCACA'..b'TGAAGGATAATGTTGCTTTAGCAACGGCCCGAAGGGCG\n+AGGCAAAGCCGAGTCATCCTGCACGACCCACCATCCTGAATGATTGAAGCAGTAACCCTT\n+ATCCAAGGGGTCGTTAGCTCAGTTGGTAGAGCAGTTGACTTTTAATCAATTGGTCGCAGG\n+TT\n+>SRR6126859_141 length_241_cov_84.7527\n+GCAGTTGACTTTTAATCAATTGGTCGCAGGTTCGAATCCTGCACGACCCACCAATTTAAC\n+ATCAAACTCAGATGTTGAACGTGAAGGATAACGTTGCGTCAGCAACCTGTAGGGCGAGGC\n+GAAGCCGAGTCATCCTGCACGACCCACCACTAACTTCGGTTAGTCAGTAATATCCAGCGT\n+AGTATCGGGTGATTAGCTCAGCTGGGAGAGCACCTCCCTTACAAGGAGGGGGTCGGCGGT\n+T\n+>SRR6126859_142 length_232_cov_26.5876\n+GATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCAAATTTAGTG\n+TGCTGATATGGCTCAGTTGGTAGAGCGCACCCTTGGTAAGGGTGAGGTCCCCAGTTCGAC\n+TCTGGGTATCAGCACCACTTAATACGGTTAAAGTTCGGTATTTGAAAAAGAATTTGTCTG\n+GCGGCAGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAA\n+>SRR6126859_143 length_225_cov_41.1235\n+CCTTTGACGCCTGTATCCGCTTTCTCGGCGAAGACCCGTGGCTGCGCCTGCGCGAGCTTA\n+AAAAGGCCATGCCGAAGACTCCCCTGCAGATGCTGCTGCGCGGCCAGAACCTGCTCGGCT\n+ACCGCCATTACGCCGATGATGTGGTGGAACGCTTCGTTGAGCGGGCGGTGAAAAACGGCA\n+TGGACGTGTTCCGCGTCTTCGATGCCATGAACGACCCGCGCAATA\n+>SRR6126859_144 length_170_cov_188.53\n+TATCCCATTAGGGCTATTTTACTTGCCATTTTGGACCTGGGCAGTGCTCGCCAAAACGCG\n+TTAGCGTTTTGAACGCCGCTTGCGGCGGCCCGAAGGGCGAGCGTAGCGAGTCAAACCTCA\n+CGTACTACGTGTACGCTCCGGTTTTTGCGCGCTGTCCGTGTCCAAACTGG\n+>SRR6126859_145 length_123_cov_63.4853\n+TCCTGAAATTCAGGGTTGACTCTGAAAGAGGAAAGCGTAATATACGCCACCTCGCGACGG\n+TGAGCTGAAAGCCGCGTCGCACCTGCTCTTTAACAATTTATCAGACAATCTGTGTGGGCA\n+CTC\n+>SRR6126859_146 length_117_cov_29.2742\n+CGGTAGCGCCGCAGCCGCAGTATCAGCAGCCGCAGCAGCCGGTAGCGCCGCAGCCGCAGT\n+ATCAGCAGCCGCAACAGCCGGTAGCGCCGCAGCCGCAGTATCAGCAGCCGCAGCAGC\n+>SRR6126859_147 length_112_cov_2.45614\n+CGTGAGGAGCAAATGAGATGAAAACACTCTGCAGTGAGTCCCTGCCTGAGGGGAGGCCGT\n+GAGGAGCAAATGAGATGAAAACACTCTGCAGTGAGTCCCTGCCTGAGGGGAG\n+>SRR6126859_148 length_100_cov_163.6\n+TTGTTCGTGAGTCTCTCAAATTTTCGCAACACGATGATGAATCGTAAGAAACATCTTCGG\n+GTTGTGAGGTTAAGCGACTAAGCGTACACGGTGGATGCCC\n+>SRR6126859_149 length_99_cov_124.886\n+CGATTCATCATCGTGTTGCGAAAATTTGAGAGACTCACGAACAACTTTCGTTGTTCTGTG\n+TTTCAATTTTCAGCTTGATCCAGATTTTTAAAGAGCAAA\n+>SRR6126859_150 length_88_cov_1.72727\n+GATATGACCCCGCCCGACGATAGCGGCCCCGACGATAGCGGCGATGACGATGTGACCCCG\n+CCCGACGATAGCGGCGATGACGATGTGA\n+>SRR6126859_151 length_87_cov_112.25\n+TGGTGCGAGGGGGGGGACTTGAACCCCCACGTCCGTAAGGACACTAACACCTGAAGCTAG\n+CGCGTCTACCAATTCCGCCACCTTCGC\n+>SRR6126859_152 length_80_cov_191\n+ATGGTGCATCCGGGAGGATTCGAACCTCCGACCGCTCGGTTCGTAGCCGAGTACTCTATC\n+CAGCTGAGCTACGGATGCAT\n+>SRR6126859_153 length_79_cov_61.4583\n+ATGGTGGCTACGACGGGATTCGAACCTGTGACCCCATCATTATGAGTGATGTGCTCTAAC\n+CAACTGAGCTACGTAGCCA\n+>SRR6126859_154 length_79_cov_146.917\n+GGGTGATTAGCTCAGCTGGGAGAGCACCTCCCTTACAAGGAGGGGGTCGGCGGTTCGATC\n+CCGTCATCACCCACCACTT\n+>SRR6126859_155 length_78_cov_233.957\n+GGGTCGTTAGCTCAGTTGGTAGAGCAGTTGACTTTTAATCAATTGGTCGCAGGTTCGAAT\n+CCTGCACGACCCACCAAT\n+>SRR6126859_156 length_77_cov_146.636\n+TGGAGCGGGAAACGAGACTCGAACTCGCGACCCCGACCTTGGCAAGGTCGTGCTCTACCA\n+ACTGAGCTATTCCCGCA\n+>SRR6126859_157 length_77_cov_101.818\n+TGGTTGCGGGGGCCGGATTTGAACCGACGACCTTCGGGTTATGAGCCCGACGAGCTACCA\n+GGCTGCTCCACCCCGCG\n+>SRR6126859_158 length_71_cov_47.75\n+GGCTGCGGCGCTACCGGCTGCTGCGGCTGCTGATACTGCGGCTGCGGCGCTACCGGCTGT\n+TGCGGCTGCTG\n+>SRR6126859_159 length_68_cov_268.077\n+CCTCACGTACTACGTGTACGCTCCGGTTTTTGCGCGCTGTCCGTGTCCAAACTGGCTGCG\n+CCAATAAC\n+>SRR6126859_160 length_66_cov_48.3636\n+GATGTTTCTTACGATTCATCATCGTGTTGCGAAAATTTGAGAGACTCACGAACAACTTTC\n+GTTGTT\n+>SRR6126859_161 length_66_cov_93.9091\n+AACAACGAAAGTTGTTCGTGAGTCTCTCAAATTTTCGCAACACGATGATGAATCGAAAGA\n+AACATC\n+>SRR6126859_162 length_66_cov_118.909\n+GATGTTTCTTACGATTCATCATCGTGTTGCGAAAATTTGAGAGACTCACGAACAATTTTC\n+GTTGTT\n+>SRR6126859_163 length_59_cov_247.5\n+AGGCGTTATTGGCGCAGCCAGTTTGGACACGGACAGCGCGCAAAAACCGGAGCGTACAC\n+>SRR6126859_164 length_56_cov_222\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+>SRR6126859_165 length_56_cov_35\n+ATTTGCTCTTTAAAAATCTGGATCAAGCTGAAAATTGAAACACAGAACAACGAAAG\n+>SRR6126859_166 length_56_cov_194\n+TGGGTCGTTAGCTCAGTTGGTAGAGCAGTTGACTTTTAATCAATTGGTCGCAGGTT\n+>SRR6126859_167 length_56_cov_801\n+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\n'
b
diff -r 000000000000 -r 90957420cc07 test-data/Filtered_sequences_(no_repeats).fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Filtered_sequences_(no_repeats).fasta Thu Oct 12 15:04:45 2017 -0400
b
b'@@ -0,0 +1,77751 @@\n+>SRR6126859_1 length_852517_cov_27.8446\n+GAGCACGACGTTGCCAAGGTCGGGGTCGCGAGTTCGAGTCTCGTTTCCCGCTCCAGTTTA\n+AAAGCATTGGCGTAAAGCGGATGTTGGCTGAAAAGCCAAAAAATTTGGCGCGTTAACAAA\n+GCGGTTATGTAGCGGATTGCAAATCCGTCTAGTCCGGTTCGACTCCGGAACGCGCCTCCA\n+ATTTTTCCCGAGCCCGGATGGTGGAATCGGTAGACACAAGGGATTTAAAATCCCTCGGCG\n+TTCGCGCTGTGCGGGTTCAAGTCCCGCTCCGGGTACCATGGGAAAGACAAGAATAATCAA\n+AGCAATAAGCAGTGTCGTGAAACCACCTACGGGTGGTTTTTTTGTGCCTTCAATTTGTAT\n+GCCGACCGCGAGCGTGAACTCTCCAGTGCGATACACAGTAAAAAACACCGGTAGCGGCAT\n+TTCTGCCAGTACAACAGTTATGTGCTACCGACCCGCAAGAACTTATGCCCGGTAAATACT\n+AAACGTCGGGTAAATCAGCCGATAAAATCCATTAATCTGTATAAAAATGCTGATATGTTT\n+TATGCCTGCTTTATTAAAGAGAGTAGGTCTGCCAGTGAAATATACCGATAATGCAAAATT\n+ATTTATTAGCTTCATTAATATTGGGATTGTAGGTTGCGCTTTTTTACTTTTTTTATTTTG\n+TATGAGCCTGAGAGTGCAATCGTAACCTACTCTCTTCCAGGAGAGCCTTCCTCTTCAACA\n+TGTGATGCGCGTAATTTTATTGGCTACGTCGATGAAAACGACTTTGCAGAGTATATAAAA\n+AAAGTAGAACTGACGCGCTACAGTATGTATTGTCTGAAAAAAACAGGGGCGGGTCAGTGG\n+AGTATTTATGCAAATTAAACGAATATATAACTGTTCAGGGGTGTTTTCTGCACAATTTAA\n+TTAAAGGTAGATAGCGAACTAATATTCATCATAAGGTAATGTGATCCGCATCATGCTTTA\n+ATCACGTTTTGCAATATTGTCTTAATAACAGGTCAACTACAGATAATTTCGGGTTTTCGT\n+GAATGGCGGTTTTGGTCAGTCTCTATATTATGCTTTCCCCGCTTTACGGAGATGATGATG\n+CAAAAGAAAACCTGGGCCGTTTTATTGATACCATTGTGCCTCACCGCTTGCCACCAGACG\n+ACCTACCGATCAACAGCCAAAGATGAAATTCTCACAGGGAGCTATATTGACCCGACCAAA\n+ACACGCTTTCCGTTAGCGGATTATTCACAGTCGGTTGATAAATGGATACCTCCGGACTCG\n+GCTGATTACACTATTCCGGTTATTGATAGTGCTACCCAACAACGCTACTTTAACGCACTA\n+AAATCTCATTATTTCGGAATGGACAGTGAAGCGCACTCGCCGTGGAATGGTTTTTATATT\n+ACCGCATTGCTGAAAAAAAACGCAGCACAGGCCCGGGATGCCAGTATTAAGCAGTTTCTT\n+AGCGACGGCAGTGCTTATTGGGGGGAAAACTTCAGACTTTATACCTCTCGCTGGAAAGAG\n+GAGGTTAGAGGGAATACGGATACGCAGATTGACAATATCTATAATGCCTCCCGGCGAGGC\n+ATTATGGTCAGAGAAAGTTTAGTCAGAGCGCTGCCAACGGACGACCCGCTCTTTAACGAT\n+CCCCGTCAGGCGGGCGAGGGGTATCCGTTTGATAATTTACAAATGTCTTCGCTGCGTCCC\n+GGTACGCCGGTCTATACGCTGACGAAGAGCAAAGACCAACGCTGGCAATATGTCGTTTCA\n+CCAGCGGTAACTGGCTGGGTTCATAGTGAAGATATTGCCAGTACGGATCAGAAATTTATA\n+ACACAGTGGGTTTTGCTCGCTCACAAGCAACTGGGGGCATTTATCAACGCGCCGGTTTCT\n+GTCCATGCCGCAGGCGTCTATTATTTCACCGGGCGGCCGGGCACTATTTTACCGTTCCGG\n+CATCAACGGGCGGGCCAGTTCCTCATTGCGGCGCCAGTTCGCGACAGCAACGGTCGCGCG\n+TTTATCCATTGGGTCTGGCTAAGCGGTAACGAGTTTACGGCTATGCCGTGGAAAATGACG\n+CCGGAAAATATCGCCGTGCTAATGAAAGCAATGCACGGCGCCCCCTACGGTTGGGGAAAT\n+TTTAACTTTTATAATGATTGTTCAGCCGAAGTCCGCAGCCTGTTAATGCCGTTTGGCATA\n+TTCCTGCCCAGACATTCATCCGCGCAGGTAGAGGCGGCTGGACGGGTGGTCGATCTTAGT\n+CATAAAAATCCCCAAATGCGGATCGATTATCTCACCAGATACGGAAAGCCGTTTACTACG\n+CTGGTCTATATTCCAGGGCATATTATGCTGTATATCGGTAACACAACCATGAATGGGCAG\n+GTTATGCCGATGACTTATCAGAATATCTGGGGATTGCGCCCAAACCATGCTAATAGCCGG\n+AGCATTATCGGTGAGGCAGTATTTTTGCCGTTACTGCGTTTTTATCCTGAAAATCCTGAG\n+CTGATATCGCTGGCTGGTAAGGTTCTGTTTAAGCTTGGTTATATAGAATAGGTTGACAAA\n+CTGTGTTATCGCGGAAGATATGATTAAGGCAACCGTGCGCGTGGATATAAAGAGAGTATC\n+CCTGTAAACTGTGTCTGAAAAAAGTATGCGCTATAGTGCGGGCTGTTTATTTGAAATACT\n+ATCTGGAGCTGATTCTGGATGTGATTATGTGCCTCACACATTGAAGTGAGAGTTATTGCT\n+GGTGGTATCTGGCTGGAGGCTGGGTGGTAAAAAGACGTAATTTATTGCATTTTAATAACG\n+GTGCCCGATCAGAGCGCGATCTTGGGGATCTGGTCACTAAAGTTTTTGAGAAAGCGGCGA\n+AAAAAGAACCGCAACCGCTATACACATTTAGCCTGCCGTTGCTGAGTGTGCAGGATGAGA\n+TCCGTGTTTACTGTAAGAAAAAAAATATCAAAATAGGGTACGATACGCTGTTTATGGAAA\n+TAACCTTTTCTGCTGACAGAGAAGCGGTAGACGAACTCATTAAACACTTTTTTACTGAAA\n+ATAAGCTCTATTTGAGAGGGCGGTTTTATCTGTCCGCGGCGGTGGTATAAGTTAGCATGA\n+GTATAATATAGCGAGAAACCCATCTCTCGCTTTTGTGTGGGGCGAGCGTTTCCTGGCGGA\n+AAATCTGCGCCCTGATCCTCCTTTTGTGCGTTAACTTCCGGAATCCTCCTCATTTCGTGT\n+TAATCTGGCCCCTGATGGTTGCCATCCAAACTGCCTGCAAAAAAATTATTTTTATCCGCT\n+GCAATGTAAGAGGCCTACAGGCGGAAGTCATCACTGAGGAAAAGCGTTATGAAAATGGAA\n+CCATTAAACGAGAACGAGCTGGAATGGCTGGATGATGTGCTGACGAAATACAATACCGAT\n+CAGGCCATTTTGGATGTCGCGGAGCTGGACGGTCTGATTACTGCGGTACTAAGTTCTCCG\n+CGTCCCATCGAGCCGGAACAGTGGCTGGTCGCGATATGGGGGGGACCCGCGTACGTACCG\n+CGCTGGACATCTGAAAAAGAAATGACGCGATTTATGGATCTGGTGTTCCAGCACATGGCG\n+GACACAGCCGCCCGGCTTGAAGATTATCCAGAACAGTTCGAGCCGCTGTTTGGCCTGCGA\n+GAAGTTGATGGCCATGAATTAACGATTGTTGAAGAATGGTGCTTTGGTTACATGCGCGGA\n+GTGTCGCTGTCCGACTGGTCTGACTTGCCGGATACGCTAAAACCGGCGCTGGAAGCGATT\n+GCCTTACACGGCACTGAAGAAAACTTTGCGCTGTTGGATAAAATGAGCCCGGAAGCGTTC\n+GATAAAAGCGTTGACGCTATCCGTATCGCGG'..b'GATGGCGCAGCCCGAGACGATAAGTTCGCTTACCGGCTCGAATAAAGA\n+GAGTTTCTCTCGATATTCAGTGCAGAATGAAAATCAGGTAGCCGAGTTCCAGGATGCGGG\n+CATCGTATAATGGCTATTACCTCAGCCTTCCAAGCTGATGATGCGGGTTCGATTCCCGCT\n+GCCCGCTCCAAGATGTGCTGATATAGCTCAGTTGGTAGAGCGCACCCTTGGTAAGGGTGA\n+GGTCGGCAGTTCGAATCTGCCTATCAGCACCACTTCTTTTCTCCTTCCTGTTTTTCTCTT\n+CTGTTTATTAGCATTCAACAAGTCGGGCGTGTTGCCTGGTTGATGTGGTGATATCACCGA\n+TTTATCCGTGTCTTAGAGGGACAATCGATGTCTAAAGAAAAGTTTGAACGTACAAAACCG\n+CACGTTAACGTCGGTACTATCGGCCACGTTGACCATGGTAAAACAACGCTGACCGCTGCC\n+ATTACTACCGTACTGGCTAAAACCTACGGCGGTGCCGCCCGCGCATTCGACCAGATCGAT\n+AACGCGCCGGAAGAAAAAGCGCGTGGTATCACCATCA\n+>SRR6126859_37 length_2182_cov_50.1815\n+CACTTTCCTCCCGCCGTCCAGGTCCGACACCACGACCACGTCAGACTCATGAGAAAGTAC\n+ATACAGGAGCCCGTTGTTCCGGTCATAATGCAGGCCGGAGATATCCTTCAGGAACCATGG\n+CAGGGAGGCCCGGTGTGTCAGCGAGTCACTCACCGACAGCGCATCAGGGCTCCGGTCCGT\n+CACGTAAAGCCGCAGCGGTTTTTTCTCCTGCGCCACCATCAGGGCATGCTCTCCCCGTCC\n+CCGGGCAAGGCCTTCAAAACCGGCATTGTCGCTGTGCCTGTCCACATCCAGCGTCAGGCT\n+GTATGTTGCCGGCGGGAGGACGGTCGTGCTGCTGTCAATGCAGTGGGTGGTGAGCGTCCG\n+CTCCCGCTCCCGGGACAGGGCATAGCGGTTCCCGCCGAGGTATTCGATGGCCTCAAAATC\n+GTGGTCGCCGTCTGACGGTATCACCCGCAGCACGTTCCCTTCCGTGTCCAGCTCCACCAC\n+CGAGGAGGGGTGGTCGGTCACGGCAAACAGCGTTCTGCTGTCCGGGTTCCATGTCAGGCC\n+GGAGAGGCCGCCGTGTATCCCGGTAATTTCTTTTTCTGCCGTGAGCTGCCATTCTGTTTC\n+TTGCGGGTACCAGGACTCCGGGAAACACAGGGTGAGCACGCCGGCCGCCATCAGGAGAAT\n+GGCGGTCAGCAGTGCTGCTTTCTGGTTACATAAATATCTGAACATCGTGGTGGTGAAAGT\n+TATGTTTTCTGTTACAGGGGAGACCGGACTTCACATCATGCCCCACGGCAGATAAGCCAG\n+AGCATGGCCAGGACGCCGGCCAGAAAGCCGGAGATAAAGACACTGGTCAGCAGCAGGAGA\n+ATCAGCGCGCGGAGCGGGGGGATATCTCCGGCCTGTATGGTGCCTTTATCATCAGTCGGC\n+ATCAGGGTTACCGGCCAGCAGTTTCCGGACCACCGCAGCATAACGGCTCCTGAAGTCTTC\n+GACGGAGAATGCGCTGAAGGCGGCATTGTTGATGTGGTAACGGCCACGGACATACACGGA\n+AGGCGTCCCCCTCACGCCATATTCCTTAAACAGCCGTTCCTGTAATGCCACCATGTCATT\n+CACGGCGGGTCTTTTTATACTTCTGTCATACTCCGCGCGACTGATACCGGTGGCAGACAT\n+AAACACCCGGCGGACATCGTCCGGGGAATGCAGGCGTTTCTCGACCATGCCGGCCGTGAA\n+GAAGGCCTTCTCGACCACGTCGGTTTCCTTCATCACCATGGCCAGCGCCCAGGCCCGTGT\n+CAGCTCATGGCCGAGGGGGCCCAGCAGGCTGACATGGTACTTGACCATCCGGTCACCTTG\n+CGGCAGTACGTGCCGGATGGCCTGGTCCACGCCCATTGTCTGTGAGAACGCATAGCAGGG\n+CGGGCAGTAAAAGGAAAAGAACTCCACCACAGCGGGGGCATCAGCCACCGGAGGCGTTAT\n+GGACTCCCACTCCTGTGCAACAGCAGGGCGTGCACAACAAAACAACAGCAAAGAAGAACA\n+TAATATTCCCTTCAGGGAATACAGATTCCGGGCATAATTCATCGTCACGGAAACACTCCA\n+TGGATAACAGATATACAGAACAAAAACCGATTAACAGCACCACCTGACAGCAACAGTAAA\n+TATTAATTCATCCGGCATGGTCATTAAGGCGGCAGCCGGAATATTCCGGTCATGAATTTT\n+CCAGAATAAAAAGCACTGACACACAAAAGACCACAGCCATTAATATGACGACGGCCAGAC\n+AGGCGTAATAAATAAATTTCATGAGCATCATAAAATAACTTTCAGTGATACTTATCCGGC\n+CTCCGGCCCGCATATCGGGCCGTCCTCTTCCAGTGAATGAACCCGGTTCAGGTCGGTGTA\n+CAGTGTCACCATCCCCAGCAGTATCAGGGTCGGCAACAGAGTGGCATCATTCCTGACGCC\n+GAGGCGCCTGAAAGCCTGGCCTTTCTGGGCTGAAATGGTTTTGACGCTGCGAAAACAGAG\n+GGCAGCAATCTGCCCGCTGTTATAGCCCCGCAGCAGCAGTCGGATAACCCGCAGTTCAGC\n+CGGACTCAGTCCGCCGTCGCTGTTAAATTCGATACAAAGCCGGTCGTACCATATCCGGCC\n+GAAATCCCCGTCTTCCGGGAAAGCGGTCAGAGCAGACGCCATAATCCGGGTACTCCCTTT\n+CCGGCATTCACTTTCTCAAGCA\n+>SRR6126859_39 length_1043_cov_65.8816\n+GCATTCGACCAGATCGATAACGCGCCGGAAGAAAAAGCGCGTGGTATCACCATCAACACT\n+TCTCACGTTGAATACGATACCCCGACCCGCCACTACGCACACGTAGACTGCCCGGGGCAC\n+GCCGACTATGTTAAAAACATGATCACCGGTGCTGCTCAGATGGACGGCGCGATTCTGGTT\n+GTTGCTGCGACTGACGGCCCGATGCCGCAGACCCGTGAGCACATCCTGCTGGGTCGTCAG\n+GTAGGCGTTCCGTACATCATCGTGTTCCTGAACAAATGCGACATGGTTGATGACGAAGAG\n+CTGCTGGAACTGGTTGAAATGGAAGTTCGTGAACTTCTGTCTCAGTACGACTTCCCGGGC\n+GACGACACTCCGATCGTTCGTGGTTCTGCTCTGAAAGCGCTGGAAGGCGACGCAGAGTGG\n+GAAGCGAAAATCATCGAACTGGCTGGCTTCCTGGATTCTTACATCCCGGAACCAGAGCGT\n+GCGATTGACAAGCCGTTCCTGCTGCCGATCGAAGACGTATTCTCCATCTCCGGTCGTGGT\n+ACCGTTGTTACCGGTCGTGTAGAACGCGGTATCATCAAAGTGGGCGAAGAAGTTGAAATC\n+GTTGGTATCAAAGAGACTCAGAAGTCTACCTGTACTGGCGTTGAAATGTTCCGCAAACTG\n+CTGGACGAAGGCCGTGCCGGTGAGAACGTAGGTGTTCTGCTGCGTGGTATCAAACGTGAA\n+GAAATCGAACGTGGTCAGGTACTGGCTAAGCCGGGCACCATCAAGCCGCACACCAAGTTC\n+GAATCTGAAGTGTACATTCTGTCCAAAGATGAAGGCGGCCGTCACACTCCGTTCTTCAAA\n+GGCTACCGTCCGCAGTTCTACTTCCGTACTACTGACGTGACTGGCACCATCGAACTGCCG\n+GAAGGCGTAGAGATGGTAATGCCGGGCGACAACATCAAAATGGTTGTTACCCTGATCCAC\n+CCGATCGCAATGGACGACGGTCTGCGTTTCGCAATCCGTGAAGGCGGCCGTACCGTTGGC\n+GCGGGCGTTGTTGCTAAAGTTCT\n'
b
diff -r 000000000000 -r 90957420cc07 test-data/Filtered_sequences_(with_repeats).fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Filtered_sequences_(with_repeats).fasta Thu Oct 12 15:04:45 2017 -0400
b
b'@@ -0,0 +1,77945 @@\n+>SRR6126859_1 length_852517_cov_27.8446\n+GAGCACGACGTTGCCAAGGTCGGGGTCGCGAGTTCGAGTCTCGTTTCCCGCTCCAGTTTA\n+AAAGCATTGGCGTAAAGCGGATGTTGGCTGAAAAGCCAAAAAATTTGGCGCGTTAACAAA\n+GCGGTTATGTAGCGGATTGCAAATCCGTCTAGTCCGGTTCGACTCCGGAACGCGCCTCCA\n+ATTTTTCCCGAGCCCGGATGGTGGAATCGGTAGACACAAGGGATTTAAAATCCCTCGGCG\n+TTCGCGCTGTGCGGGTTCAAGTCCCGCTCCGGGTACCATGGGAAAGACAAGAATAATCAA\n+AGCAATAAGCAGTGTCGTGAAACCACCTACGGGTGGTTTTTTTGTGCCTTCAATTTGTAT\n+GCCGACCGCGAGCGTGAACTCTCCAGTGCGATACACAGTAAAAAACACCGGTAGCGGCAT\n+TTCTGCCAGTACAACAGTTATGTGCTACCGACCCGCAAGAACTTATGCCCGGTAAATACT\n+AAACGTCGGGTAAATCAGCCGATAAAATCCATTAATCTGTATAAAAATGCTGATATGTTT\n+TATGCCTGCTTTATTAAAGAGAGTAGGTCTGCCAGTGAAATATACCGATAATGCAAAATT\n+ATTTATTAGCTTCATTAATATTGGGATTGTAGGTTGCGCTTTTTTACTTTTTTTATTTTG\n+TATGAGCCTGAGAGTGCAATCGTAACCTACTCTCTTCCAGGAGAGCCTTCCTCTTCAACA\n+TGTGATGCGCGTAATTTTATTGGCTACGTCGATGAAAACGACTTTGCAGAGTATATAAAA\n+AAAGTAGAACTGACGCGCTACAGTATGTATTGTCTGAAAAAAACAGGGGCGGGTCAGTGG\n+AGTATTTATGCAAATTAAACGAATATATAACTGTTCAGGGGTGTTTTCTGCACAATTTAA\n+TTAAAGGTAGATAGCGAACTAATATTCATCATAAGGTAATGTGATCCGCATCATGCTTTA\n+ATCACGTTTTGCAATATTGTCTTAATAACAGGTCAACTACAGATAATTTCGGGTTTTCGT\n+GAATGGCGGTTTTGGTCAGTCTCTATATTATGCTTTCCCCGCTTTACGGAGATGATGATG\n+CAAAAGAAAACCTGGGCCGTTTTATTGATACCATTGTGCCTCACCGCTTGCCACCAGACG\n+ACCTACCGATCAACAGCCAAAGATGAAATTCTCACAGGGAGCTATATTGACCCGACCAAA\n+ACACGCTTTCCGTTAGCGGATTATTCACAGTCGGTTGATAAATGGATACCTCCGGACTCG\n+GCTGATTACACTATTCCGGTTATTGATAGTGCTACCCAACAACGCTACTTTAACGCACTA\n+AAATCTCATTATTTCGGAATGGACAGTGAAGCGCACTCGCCGTGGAATGGTTTTTATATT\n+ACCGCATTGCTGAAAAAAAACGCAGCACAGGCCCGGGATGCCAGTATTAAGCAGTTTCTT\n+AGCGACGGCAGTGCTTATTGGGGGGAAAACTTCAGACTTTATACCTCTCGCTGGAAAGAG\n+GAGGTTAGAGGGAATACGGATACGCAGATTGACAATATCTATAATGCCTCCCGGCGAGGC\n+ATTATGGTCAGAGAAAGTTTAGTCAGAGCGCTGCCAACGGACGACCCGCTCTTTAACGAT\n+CCCCGTCAGGCGGGCGAGGGGTATCCGTTTGATAATTTACAAATGTCTTCGCTGCGTCCC\n+GGTACGCCGGTCTATACGCTGACGAAGAGCAAAGACCAACGCTGGCAATATGTCGTTTCA\n+CCAGCGGTAACTGGCTGGGTTCATAGTGAAGATATTGCCAGTACGGATCAGAAATTTATA\n+ACACAGTGGGTTTTGCTCGCTCACAAGCAACTGGGGGCATTTATCAACGCGCCGGTTTCT\n+GTCCATGCCGCAGGCGTCTATTATTTCACCGGGCGGCCGGGCACTATTTTACCGTTCCGG\n+CATCAACGGGCGGGCCAGTTCCTCATTGCGGCGCCAGTTCGCGACAGCAACGGTCGCGCG\n+TTTATCCATTGGGTCTGGCTAAGCGGTAACGAGTTTACGGCTATGCCGTGGAAAATGACG\n+CCGGAAAATATCGCCGTGCTAATGAAAGCAATGCACGGCGCCCCCTACGGTTGGGGAAAT\n+TTTAACTTTTATAATGATTGTTCAGCCGAAGTCCGCAGCCTGTTAATGCCGTTTGGCATA\n+TTCCTGCCCAGACATTCATCCGCGCAGGTAGAGGCGGCTGGACGGGTGGTCGATCTTAGT\n+CATAAAAATCCCCAAATGCGGATCGATTATCTCACCAGATACGGAAAGCCGTTTACTACG\n+CTGGTCTATATTCCAGGGCATATTATGCTGTATATCGGTAACACAACCATGAATGGGCAG\n+GTTATGCCGATGACTTATCAGAATATCTGGGGATTGCGCCCAAACCATGCTAATAGCCGG\n+AGCATTATCGGTGAGGCAGTATTTTTGCCGTTACTGCGTTTTTATCCTGAAAATCCTGAG\n+CTGATATCGCTGGCTGGTAAGGTTCTGTTTAAGCTTGGTTATATAGAATAGGTTGACAAA\n+CTGTGTTATCGCGGAAGATATGATTAAGGCAACCGTGCGCGTGGATATAAAGAGAGTATC\n+CCTGTAAACTGTGTCTGAAAAAAGTATGCGCTATAGTGCGGGCTGTTTATTTGAAATACT\n+ATCTGGAGCTGATTCTGGATGTGATTATGTGCCTCACACATTGAAGTGAGAGTTATTGCT\n+GGTGGTATCTGGCTGGAGGCTGGGTGGTAAAAAGACGTAATTTATTGCATTTTAATAACG\n+GTGCCCGATCAGAGCGCGATCTTGGGGATCTGGTCACTAAAGTTTTTGAGAAAGCGGCGA\n+AAAAAGAACCGCAACCGCTATACACATTTAGCCTGCCGTTGCTGAGTGTGCAGGATGAGA\n+TCCGTGTTTACTGTAAGAAAAAAAATATCAAAATAGGGTACGATACGCTGTTTATGGAAA\n+TAACCTTTTCTGCTGACAGAGAAGCGGTAGACGAACTCATTAAACACTTTTTTACTGAAA\n+ATAAGCTCTATTTGAGAGGGCGGTTTTATCTGTCCGCGGCGGTGGTATAAGTTAGCATGA\n+GTATAATATAGCGAGAAACCCATCTCTCGCTTTTGTGTGGGGCGAGCGTTTCCTGGCGGA\n+AAATCTGCGCCCTGATCCTCCTTTTGTGCGTTAACTTCCGGAATCCTCCTCATTTCGTGT\n+TAATCTGGCCCCTGATGGTTGCCATCCAAACTGCCTGCAAAAAAATTATTTTTATCCGCT\n+GCAATGTAAGAGGCCTACAGGCGGAAGTCATCACTGAGGAAAAGCGTTATGAAAATGGAA\n+CCATTAAACGAGAACGAGCTGGAATGGCTGGATGATGTGCTGACGAAATACAATACCGAT\n+CAGGCCATTTTGGATGTCGCGGAGCTGGACGGTCTGATTACTGCGGTACTAAGTTCTCCG\n+CGTCCCATCGAGCCGGAACAGTGGCTGGTCGCGATATGGGGGGGACCCGCGTACGTACCG\n+CGCTGGACATCTGAAAAAGAAATGACGCGATTTATGGATCTGGTGTTCCAGCACATGGCG\n+GACACAGCCGCCCGGCTTGAAGATTATCCAGAACAGTTCGAGCCGCTGTTTGGCCTGCGA\n+GAAGTTGATGGCCATGAATTAACGATTGTTGAAGAATGGTGCTTTGGTTACATGCGCGGA\n+GTGTCGCTGTCCGACTGGTCTGACTTGCCGGATACGCTAAAACCGGCGCTGGAAGCGATT\n+GCCTTACACGGCACTGAAGAAAACTTTGCGCTGTTGGATAAAATGAGCCCGGAAGCGTTC\n+GATAAAAGCGTTGACGCTATCCGTATCGCGG'..b'CCTTAAACAGCCGTTCCTGTAATGCCACCATGTCATT\n+CACGGCGGGTCTTTTTATACTTCTGTCATACTCCGCGCGACTGATACCGGTGGCAGACAT\n+AAACACCCGGCGGACATCGTCCGGGGAATGCAGGCGTTTCTCGACCATGCCGGCCGTGAA\n+GAAGGCCTTCTCGACCACGTCGGTTTCCTTCATCACCATGGCCAGCGCCCAGGCCCGTGT\n+CAGCTCATGGCCGAGGGGGCCCAGCAGGCTGACATGGTACTTGACCATCCGGTCACCTTG\n+CGGCAGTACGTGCCGGATGGCCTGGTCCACGCCCATTGTCTGTGAGAACGCATAGCAGGG\n+CGGGCAGTAAAAGGAAAAGAACTCCACCACAGCGGGGGCATCAGCCACCGGAGGCGTTAT\n+GGACTCCCACTCCTGTGCAACAGCAGGGCGTGCACAACAAAACAACAGCAAAGAAGAACA\n+TAATATTCCCTTCAGGGAATACAGATTCCGGGCATAATTCATCGTCACGGAAACACTCCA\n+TGGATAACAGATATACAGAACAAAAACCGATTAACAGCACCACCTGACAGCAACAGTAAA\n+TATTAATTCATCCGGCATGGTCATTAAGGCGGCAGCCGGAATATTCCGGTCATGAATTTT\n+CCAGAATAAAAAGCACTGACACACAAAAGACCACAGCCATTAATATGACGACGGCCAGAC\n+AGGCGTAATAAATAAATTTCATGAGCATCATAAAATAACTTTCAGTGATACTTATCCGGC\n+CTCCGGCCCGCATATCGGGCCGTCCTCTTCCAGTGAATGAACCCGGTTCAGGTCGGTGTA\n+CAGTGTCACCATCCCCAGCAGTATCAGGGTCGGCAACAGAGTGGCATCATTCCTGACGCC\n+GAGGCGCCTGAAAGCCTGGCCTTTCTGGGCTGAAATGGTTTTGACGCTGCGAAAACAGAG\n+GGCAGCAATCTGCCCGCTGTTATAGCCCCGCAGCAGCAGTCGGATAACCCGCAGTTCAGC\n+CGGACTCAGTCCGCCGTCGCTGTTAAATTCGATACAAAGCCGGTCGTACCATATCCGGCC\n+GAAATCCCCGTCTTCCGGGAAAGCGGTCAGAGCAGACGCCATAATCCGGGTACTCCCTTT\n+CCGGCATTCACTTTCTCAAGCA\n+>SRR6126859_38 length_1598_cov_94.5224(2 copies)\n+TCCATGAGCGCCACCTACGGCCACCCGGCCACCGAAGCGCTGGTGGCGACGTTGGCAGGT\n+ACTGAGCATGACACCGGGCTGGATATCCTGAAGCTGGAAAATATCGCCGCGTACTTCCGC\n+GAGGTGCGCAAAAAGTACCACGCCTTTGAAGGCCAGCTGAAAGGCTACGACAGCCGCATC\n+CTGGTGGCGCAGGTGCCGGGCGGAATGCTCACTAACCTCGAAAGCCAGCTGAAGCAGCAG\n+AACGCGGCGGACAAACTCGACCAGGTGCTGGCGGAAATCCCCCGCGTGCGCGAGGACCTC\n+GGCTTTATTCCGCTGGTGACCCCCACCTCACAGATTGTCGGCACCCAGGCGGTGCTCAAC\n+GTGCTGACGGGCGAACGCTACAAGACCATTGCCAAAGAAACGGCGGGCATTCTGAAAGGC\n+GAATACGGCCACACCCCGGTGCCGGTGAACGCCGCGTTACAGGCCCGCGTGCTGGAAGGA\n+GGCGCTCCGGTGACCTGCCGCCCGGCAGACCTGCTGAAGCCGGAACTGGCTGAACTGGAA\n+GCGGACGTCAGGCGCCAGGCGCAGGAGAAGGGGATTCAGCTTGCGGGAAACGCCATCGAC\n+GACGTGCTCACCGTGGCGCTGTTCCCGCAAATCGGCCTCAAATTCCTCGAAAACCGCCAC\n+AACCCGGCGGCGTTTGAGCCACTGCCGCAGGCGGAAGCCGCGCAGCCGGTGACAAAAGCA\n+GAGAAGCCTGCCGCTTCCGGTATCTACACCGTGGAAGTGGAAGGCAAAGCCTTTGTGGTG\n+AAGGTCAGCGACGGCGGCGATATCAGCCAGCTCACTGCGGCTGCACCTGCTGCCTCTTCT\n+GCTCCTGCCACCGCCCCGGCAGGCGCCGGCACCCCGGTCACCGCGCCGCTGGCGGGCAAC\n+ATCTGGAAGGTGATTGCCACCGAAGGCCAGACAGTGGCCGAAGGCGATGTGCTGCTGATT\n+CTGGAAGCCATGAAGATGGAAACCGAAATCCGCGCCGCGCAGGCCGGGACGGTACGCGGT\n+ATCGCGGTGAAGTCCGGGGACGCGGTTTCCGTGGGCGACACCCTGATGACGCTGGCGTAA\n+CGGAGAACTGAAATGGAAAGTCTGAACGCCCTGCTTCAGGGCATGGGGCTGATGCACCTT\n+GGCGCAGGCCAGGCCATCATGCTGCTGGTCAGCCTGCTGCTGCTGTGGCTGGCGATTGCG\n+AAAAAGTTCGAACCGTTACTGCTGCTGCCGATTGGCTTCGGCGGCCTGCTCTCCAACATC\n+CCGGAAGCGGGTATGGCGCTGACCGCGCTGGAGAGCCTGCTGGCGCATCACGACGCCGGG\n+CAGCTGGCGGTGATTGCCGCGAAGCTTAACTGCGCGCCGGACGTGCACGCCATTAAAGAG\n+GCATTAGCGCTGGCGCTGCCGTCGGTGCAGAGCCAGATGGAGAACCTGGCGGTGGACATG\n+GGCTACACGCCGGGGGTGCTGGCGCTGTTCTATAAAGTGGCGATTGGCTCCGGCGTCGCG\n+CCGCTGGTCATCTTCATGGGCGTCGGCGCGATGACCGACTTCGGCCCGCTGCTGGCCAAC\n+CCGCGCACCCTGCTGCTGGGGGCGGCGGCGCAGTTCGG\n+>SRR6126859_39 length_1043_cov_65.8816\n+GCATTCGACCAGATCGATAACGCGCCGGAAGAAAAAGCGCGTGGTATCACCATCAACACT\n+TCTCACGTTGAATACGATACCCCGACCCGCCACTACGCACACGTAGACTGCCCGGGGCAC\n+GCCGACTATGTTAAAAACATGATCACCGGTGCTGCTCAGATGGACGGCGCGATTCTGGTT\n+GTTGCTGCGACTGACGGCCCGATGCCGCAGACCCGTGAGCACATCCTGCTGGGTCGTCAG\n+GTAGGCGTTCCGTACATCATCGTGTTCCTGAACAAATGCGACATGGTTGATGACGAAGAG\n+CTGCTGGAACTGGTTGAAATGGAAGTTCGTGAACTTCTGTCTCAGTACGACTTCCCGGGC\n+GACGACACTCCGATCGTTCGTGGTTCTGCTCTGAAAGCGCTGGAAGGCGACGCAGAGTGG\n+GAAGCGAAAATCATCGAACTGGCTGGCTTCCTGGATTCTTACATCCCGGAACCAGAGCGT\n+GCGATTGACAAGCCGTTCCTGCTGCCGATCGAAGACGTATTCTCCATCTCCGGTCGTGGT\n+ACCGTTGTTACCGGTCGTGTAGAACGCGGTATCATCAAAGTGGGCGAAGAAGTTGAAATC\n+GTTGGTATCAAAGAGACTCAGAAGTCTACCTGTACTGGCGTTGAAATGTTCCGCAAACTG\n+CTGGACGAAGGCCGTGCCGGTGAGAACGTAGGTGTTCTGCTGCGTGGTATCAAACGTGAA\n+GAAATCGAACGTGGTCAGGTACTGGCTAAGCCGGGCACCATCAAGCCGCACACCAAGTTC\n+GAATCTGAAGTGTACATTCTGTCCAAAGATGAAGGCGGCCGTCACACTCCGTTCTTCAAA\n+GGCTACCGTCCGCAGTTCTACTTCCGTACTACTGACGTGACTGGCACCATCGAACTGCCG\n+GAAGGCGTAGAGATGGTAATGCCGGGCGACAACATCAAAATGGTTGTTACCCTGATCCAC\n+CCGATCGCAATGGACGACGGTCTGCGTTTCGCAATCCGTGAAGGCGGCCGTACCGTTGGC\n+GCGGGCGTTGTTGCTAAAGTTCT\n'
b
diff -r 000000000000 -r 90957420cc07 test-data/Repeat_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Repeat_sequences.fasta Thu Oct 12 15:04:45 2017 -0400
b
b'@@ -0,0 +1,194 @@\n+>SRR6126859_34 length_5115_cov_198.272(5 copies)\n+TCTCATCCGCCAAAACATCTTCGGCGTTGTAAGGTTAAGCCTCACGGTTCATTAGTACCG\n+GTTAGCTCAACGCATCGCTGCGCTTACACACCCGGCCTATCAACGTCGTCGTCTTCAACG\n+TTCCTTCAGGAGACTTTAAGTCTCAGGGAGAACTCATCTCGGGGCAAGTTTCGTGCTTAG\n+ATGCTTTCAGCACTTATCTCTTCCGCATTTAGCTACCGGGCAGTGCCATTGGCATGACAA\n+CCCGAACACCAGTGATGCGTCCACTCCGGTCCTCTCGTACTAGGAGCAGCCCCCCTCAGT\n+TCTCCAGCGCCCACGGCAGATAGGGACCGAACTGTCTCACGACGTTCTAAACCCAGCTCG\n+CGTACCACTTTAAATGGCGAACAGCCATACCCTTGGGACCTACTTCAGCCCCAGGATGTG\n+ATGAGCCGACATCGAGGTGCCAAACACCGCCGTCGATATGAACTCTTGGGCGGTATCAGC\n+CTGTTATCCCCGGAGTACCTTTTATCCGTTGAGCGATGGCCCTTCCATTCAGAACCACCG\n+GATCACTATGACCTGCTTTCGCACCTGCTCGCGCCGTCACGCTCGCAGTCAAGCTGGCTT\n+ATGCCATTGCACTAACCTCCTGATGTCCGACCAGGATTAGCCAACCTTCGTGCTCCTCCG\n+TTACTCTTTAGGAGGAGACCGCCCCAGTCAAACTACCCACCAGACACTGTCCGCAACCCG\n+GGTAACGGGTCCACGTTAGAACATCAAACATTAAAGGGTGGTATTTCAAGGTCGGCTCCA\n+TGCAGACTGGCGTCCACACTTCAAAGCCTCCCACCTATCCTACACATCAAGGCTCAATGT\n+TCAGTGTCAAGCTATAGTAAAGGTTCACGGGGTCTTTCCGTCTTGCCGCGGGTACACTGC\n+ATCTTCACAGCGAGTTCAATTTCACTGAGTCTCGGGTGGAGACAGCCTGGCCATCATTAC\n+GCCATTCGTGCAGGTCGGAACTTACCCGACAAGGAATTTCGCTACCTTAGGACCGTTATA\n+GTTACGGCCGCCGTTTACCGGGGCTTCGATCAGGAGCTTCGCTTGCGCTGACCCCATCAA\n+TTAACCTTCCGGCACCGGGCAGGCGTCACACCGTATACGTCCACTTTCGTGTTTGCACAG\n+TGCTGTGTTTTTAATAAACAGTTGCAGCCAGCTGGTATCTTCGACTGACTTCAGCTCCAT\n+GAGTAAATCACTTCACCTACGTGTCAGCGTGCCTTCTCCCGAAGTTACGGCACCATTTTG\n+CCTAGTTCCTTCACCCGAGTTCTCTCAAGCGCCTTGGTATTCTCTACCTGACCACCTGTG\n+TCGGTTTGGGGTACGATTTGATGTTACCTGATGCTTAGAGGCTTTTCCTGGAAGCAGGGC\n+ATTTGTTGCTTCAGCACCGTAGTGCCTCGTCGTCACGCCTCAGTGTTAAAGTGAACCGGA\n+TTTACCTGGAACACACACCTACACGCTTAAACCGGGACAACCGTCGCCCGGCCAACATAG\n+CCTTCTCCGTCCCCCCTTCGCAGTAACACCAAGTACGGGAATATTAACCCGTTTCCCATC\n+GACTACGCCTTTCGGCCTCGCCTTAGGGGTCGACTCACCCTGCCCCGATTAACGTTGGAC\n+AGGAACCCTTGGTCTTCCGGCGAGCGGGCTTTTCACCCGCTTTATCGTTACTTATGTCAG\n+CATTCGCACTTCTGATACCTCCAGCAACCCTCACAGGCCACCTTCGCAGGCTTACAGAAC\n+GCTCCCCTACCCAACAACACACAGTGTCGCTGCCGCAGCTTCGGTGCATGGTTTAGCCCC\n+GTTACATCTTCCGCGCAGGCCGACTCGACCAGTGAGCTATTACGCTTTCTTTAAATGATG\n+GCTGCTTCTAAGCCAACATCCTGGCTGTCTGGGCCTTCCCACATCGTTTCCCACTTAACC\n+ATGACTTTGGGACCTTAGCTGGCGGTCTGGGTTGTTTCCCTCTTCACGACGGACGTTAGC\n+ACCCGCCGTGTGTCTCCCGTGATAACATTCTCCGGTATTCGCAGTTTGCATCGGGTTGGT\n+AAGCCGGGATGGCCCCCTAGCCGAAACAGTGCTCTACCCCCGGAGATGAATTCACGAGGC\n+GCTACCTAAATAGCTTTCGGGGAGAACCAGCTATCTCCCGGTTTGATTGGCCTTTCACCC\n+CCAGCCACAGGTCATCCGCTAATTTTTCAACATTAGTCGGTTCGGTCCTCCAGTTAGTGT\n+TACCCAACCTTCAACCTGCCCATGGCTAGATCACCGGGTTTCGGGTCTATACCCTGCAAC\n+TTAACGCCCGGTTAAGACTCGGTTTCCCTCCGGCTCCCCTATTCGGTTAACCTTGCTACA\n+GAATATAAGTCGCTGACCCATTATACAAAAGGTACGCAGTCACCCNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNCTTTTCATCAGACAATCTGTGTGAGCACTGCAAAGTACGCTT\n+CTTTAAGGTAAGGAGGTGATCCAACCGCAGGTTCCCCTACGGTTACCTTGTTACGACTTC\n+ACCCCAGTCATGAATCACAAAGTGGTAAGCGCCCTCCCGAAGGTTAAGCTACCTACTTCT\n+TTTGCAACCCACTCCCATGGTGTGACGGGCGGTGTGTACAAGGCCCGGGAACGTATTCAC\n+CGTGGCATTCTGATCCACGATTACTAGCGATTCCGACTTCATGGAGTCGAGTTGCAGACT\n+CCAATCCGGACTACGACGCACTTTATGAGGTCCGCTTGCTCTCGCGAGGTCGCTTCTCTT\n+TGTATGCGCCATTGTAGCACGTGTGTAGCCCTGGTCGTAAGGGCCATGATGACTTGACGT\n+CATCCCCACCTTCCTCCAGTTTAT'..b'TATGACGGAACTGGAAGGGCATTTGCTGAACGCC\n+TTAGAGCATTTGCAACAGGACTATATGCGGCGGCTGAACGAGTGGGAGAGCGCCTTCGCA\n+GAATTGCAGAAGATGCACGCGGTTACGCAACAGAACAACGCGATACTGAACGAGCGTGTC\n+GTGAACTTGAGTCAGCAGGTGCAGCTATTAGCCGGACAAGTCGACCGCTTGAGTCGGTTA\n+TTTATCACGAACAACAGATAAGGGATGAACAGGAGCGTCAGGCGCAGGAGCTTGCCCGGC\n+GCTGGCATGAAGAGCAGGAACCCGAAGCGCGGATCTGGCGTGGCCCGTCAATGGGGATGT\n+GACCGGGGTACCCCCTGAAATCGAGCCAGTTTGTGGCACGCCCTACCCGGATGGCGCCAA\n+TGTTAATGTACGTACATTAGCATTAACAATATCCAAACCTCCCCATTTTGCCCCGTTCGA\n+CGGCTGGAACGTGCGAAACTGATCGTCATTCAGTTTCGCGCATAACTATGCATGAGGTTA\n+AAATTTACCAGGCGCGATCGCGGCAGTTTTTCGGGTGGTTTGTTGCTGTTTTTACCTGTC\n+TGCTGCCGTGATCGCGCTGAACGCGTTTAAGCGGTACGCGCAATGCGACGTTATGGTAAA\n+TACGGAGTATTTTAATTGCGGGCCTTTGTTGTATATCTCTGTCGTTTGTCCCCGGGGGGA\n+AGCATTACTGCTAACAAGTCCCGTGAGAAAACTGCGCAATCGTATTGCGCACCATGAACC\n+TATTCTCAACCGCAATCTGGAGGATGATTTTGCTACGATAAAGAGAATTATCGCGTATCG\n+GTGTCAGCATTCATTGGAGTGGATGCTGAAAAACCAGGTGTTACTCCCTCTACTTACACT\n+TAAACCTCTCTAAAAATATAAGTAAGTACTATCGAGGGCTATCATGAACTATGGTGCCCT\n+CTGAGCATAGCTACCCATTACTTCTTCTATGTCATTGAATAATTCATTTCTTAACTCTAA\n+GTATTCTAGTAACTCCATTTCTAATGACTCTGTCGTGAACTTAAATCTAGAAGGATCTTT\n+GATTGAGTTTACTAGTCTTGATAACGCTCTATATGTTTTGAGTGAAGGTTGCATTATAGA\n+TGGAAAGTATTTGAAAACCTCTTCATCTCTGTTGAAATTAACTAAAAACTTGTAGTCATC\n+ATTTATAAATCTCAAGCATACAGCGTCAAGCTTATCGCCTTTTCTTATTATTATTTCTTT\n+ATAGGAAAGATATTTTTCAGAAGATAAGTCCCACATCAAGCGAAAATCTCTCTTTGCGAA\n+CGAGTTCGCGTCATCATTGAAAAGAGATGATGTGATGGCTTTGGATAGACCTTCTGTGTT\n+TATCTCTTTGTTGGATGGGTAGTAAATTTTATTAATTGCTTTCATTCTCTCAAATGATGA\n+ATATAGGTCATTATGCCACCTTGTTGAAATCACTCCTTTAGTTATATTCTCCTTGCTAAA\n+ATTAAGCCCTGTATCAAAAATAAATGCAGCCATCAGTGAATATGATATGTTTTTTATTAA\n+CTCAGACTTAAATGGTTCCGCTATGGAGTAGTACAGTGATAAAATAACTAAAAATAAAAG\n+GTATTGTCTTACCTTTCCCGATGAGTATATGTAAAGGCGATATTTATTTATTGATAGTTT\n+TATTTTGTATGGAATGATCTCATTTCTGTCATATCCTCTTCGTGGTGGAGCAAAACAAAA\n+GAAACATGCAGCTATTAATAAAACAAAAAAGATGAACAGTGTAAGCAAGTTCGGCTCCTG\n+TGTATGCTCACTATGCATGATTGTCTTGAGATCTAACGATCCTGAAGGTAGTCTTGTCCG\n+TAAACGAAAAAACCGCCCAGCAAGGCGGCTTTTCGAAGGTTCTCAGAGCAGCAACTCTTT\n+GAACCAAGGTAACTGGCTTGGAGGAGCACAGCCACCAAATCTGTCCTTTCAGTTTAGCCT\n+TAACCGGCGCATAACTTCAAGACTAACTCCTCTAAACGGTTACCAATGGCTGCTGCCAGT\n+GGCGTTTTTACGTGTCTTTCCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAG\n+CGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACC\n+GGGCCGAGATACCAACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAG\n+GCGGACAGGTATCCGGTAAGCGGCAGGG\n+>SRR6126859_38 length_1598_cov_94.5224(2 copies)\n+TCCATGAGCGCCACCTACGGCCACCCGGCCACCGAAGCGCTGGTGGCGACGTTGGCAGGT\n+ACTGAGCATGACACCGGGCTGGATATCCTGAAGCTGGAAAATATCGCCGCGTACTTCCGC\n+GAGGTGCGCAAAAAGTACCACGCCTTTGAAGGCCAGCTGAAAGGCTACGACAGCCGCATC\n+CTGGTGGCGCAGGTGCCGGGCGGAATGCTCACTAACCTCGAAAGCCAGCTGAAGCAGCAG\n+AACGCGGCGGACAAACTCGACCAGGTGCTGGCGGAAATCCCCCGCGTGCGCGAGGACCTC\n+GGCTTTATTCCGCTGGTGACCCCCACCTCACAGATTGTCGGCACCCAGGCGGTGCTCAAC\n+GTGCTGACGGGCGAACGCTACAAGACCATTGCCAAAGAAACGGCGGGCATTCTGAAAGGC\n+GAATACGGCCACACCCCGGTGCCGGTGAACGCCGCGTTACAGGCCCGCGTGCTGGAAGGA\n+GGCGCTCCGGTGACCTGCCGCCCGGCAGACCTGCTGAAGCCGGAACTGGCTGAACTGGAA\n+GCGGACGTCAGGCGCCAGGCGCAGGAGAAGGGGATTCAGCTTGCGGGAAACGCCATCGAC\n+GACGTGCTCACCGTGGCGCTGTTCCCGCAAATCGGCCTCAAATTCCTCGAAAACCGCCAC\n+AACCCGGCGGCGTTTGAGCCACTGCCGCAGGCGGAAGCCGCGCAGCCGGTGACAAAAGCA\n+GAGAAGCCTGCCGCTTCCGGTATCTACACCGTGGAAGTGGAAGGCAAAGCCTTTGTGGTG\n+AAGGTCAGCGACGGCGGCGATATCAGCCAGCTCACTGCGGCTGCACCTGCTGCCTCTTCT\n+GCTCCTGCCACCGCCCCGGCAGGCGCCGGCACCCCGGTCACCGCGCCGCTGGCGGGCAAC\n+ATCTGGAAGGTGATTGCCACCGAAGGCCAGACAGTGGCCGAAGGCGATGTGCTGCTGATT\n+CTGGAAGCCATGAAGATGGAAACCGAAATCCGCGCCGCGCAGGCCGGGACGGTACGCGGT\n+ATCGCGGTGAAGTCCGGGGACGCGGTTTCCGTGGGCGACACCCTGATGACGCTGGCGTAA\n+CGGAGAACTGAAATGGAAAGTCTGAACGCCCTGCTTCAGGGCATGGGGCTGATGCACCTT\n+GGCGCAGGCCAGGCCATCATGCTGCTGGTCAGCCTGCTGCTGCTGTGGCTGGCGATTGCG\n+AAAAAGTTCGAACCGTTACTGCTGCTGCCGATTGGCTTCGGCGGCCTGCTCTCCAACATC\n+CCGGAAGCGGGTATGGCGCTGACCGCGCTGGAGAGCCTGCTGGCGCATCACGACGCCGGG\n+CAGCTGGCGGTGATTGCCGCGAAGCTTAACTGCGCGCCGGACGTGCACGCCATTAAAGAG\n+GCATTAGCGCTGGCGCTGCCGTCGGTGCAGAGCCAGATGGAGAACCTGGCGGTGGACATG\n+GGCTACACGCCGGGGGTGCTGGCGCTGTTCTATAAAGTGGCGATTGGCTCCGGCGTCGCG\n+CCGCTGGTCATCTTCATGGGCGTCGGCGCGATGACCGACTTCGGCCCGCTGCTGGCCAAC\n+CCGCGCACCCTGCTGCTGGGGGCGGCGGCGCAGTTCGG\n'
b
diff -r 000000000000 -r 90957420cc07 test-data/Results_summary.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Results_summary.txt Thu Oct 12 15:04:45 2017 -0400
b
b'@@ -0,0 +1,228 @@\n+Filter SPAdes repeats Results Summary\n+======================================\n+\n+Paramaters used:\n+Length cutoff for calcularing average cutoff: 5000\n+Coverage cutoff ratio: 0.33\n+Repeat cutoff ratio: 1.75\n+Length cutoff: 1000\n+\n+Calculations:\n+Average coverage: 39.0276794117647\n+Coverage cutoff: 12.8791342058824\n+Repeat cutoff: 68.2984389705882\n+\n+File headers:\n+Filtered sequences (with repeats):\n+>SRR6126859_1 length_852517_cov_27.8446\n+>SRR6126859_2 length_405903_cov_39.4874\n+>SRR6126859_3 length_294698_cov_37.1403\n+>SRR6126859_4 length_269266_cov_28.5399\n+>SRR6126859_5 length_264367_cov_32.1759\n+>SRR6126859_6 length_229146_cov_33.6442\n+>SRR6126859_7 length_228458_cov_34.4838\n+>SRR6126859_8 length_221034_cov_34.7177\n+>SRR6126859_9 length_189827_cov_33.7581\n+>SRR6126859_10 length_183437_cov_32.1863\n+>SRR6126859_11 length_162443_cov_33.7668\n+>SRR6126859_12 length_153208_cov_40.7852\n+>SRR6126859_13 length_151655_cov_31.6292\n+>SRR6126859_14 length_135647_cov_38.4498\n+>SRR6126859_15 length_120775_cov_32.1216\n+>SRR6126859_16 length_106152_cov_35.3715\n+>SRR6126859_17 length_77075_cov_40.8552\n+>SRR6126859_18 length_64552_cov_32.9705\n+>SRR6126859_19 length_60038_cov_41.8947\n+>SRR6126859_20 length_57173_cov_21.4966\n+>SRR6126859_21 length_56928_cov_34.5009\n+>SRR6126859_22 length_55686_cov_38.1046\n+>SRR6126859_23 length_53048_cov_38.3551\n+>SRR6126859_24 length_49979_cov_31.737\n+>SRR6126859_25 length_49347_cov_32.9659\n+>SRR6126859_26 length_44649_cov_39.2504\n+>SRR6126859_27 length_36027_cov_38.5995\n+>SRR6126859_28 length_32857_cov_33.8707\n+>SRR6126859_29 length_26264_cov_32.2501\n+>SRR6126859_30 length_10716_cov_31.2279\n+>SRR6126859_32 length_6213_cov_64.7694\n+>SRR6126859_33 length_5280_cov_19.9357\n+>SRR6126859_34 length_5115_cov_198.272(5 copies)\n+>SRR6126859_35 length_4648_cov_102.186(2 copies)\n+>SRR6126859_36 length_4417_cov_33.2696\n+>SRR6126859_37 length_2182_cov_50.1815\n+>SRR6126859_38 length_1598_cov_94.5224(2 copies)\n+>SRR6126859_39 length_1043_cov_65.8816\n+\n+Filtered sequences (no repeats):\n+>SRR6126859_1 length_852517_cov_27.8446\n+>SRR6126859_2 length_405903_cov_39.4874\n+>SRR6126859_3 length_294698_cov_37.1403\n+>SRR6126859_4 length_269266_cov_28.5399\n+>SRR6126859_5 length_264367_cov_32.1759\n+>SRR6126859_6 length_229146_cov_33.6442\n+>SRR6126859_7 length_228458_cov_34.4838\n+>SRR6126859_8 length_221034_cov_34.7177\n+>SRR6126859_9 length_189827_cov_33.7581\n+>SRR6126859_10 length_183437_cov_32.1863\n+>SRR6126859_11 length_162443_cov_33.7668\n+>SRR6126859_12 length_153208_cov_40.7852\n+>SRR6126859_13 length_151655_cov_31.6292\n+>SRR6126859_14 length_135647_cov_38.4498\n+>SRR6126859_15 length_120775_cov_32.1216\n+>SRR6126859_16 length_106152_cov_35.3715\n+>SRR6126859_17 length_77075_cov_40.8552\n+>SRR6126859_18 length_64552_cov_32.9705\n+>SRR6126859_19 length_60038_cov_41.8947\n+>SRR6126859_20 length_57173_cov_21.4966\n+>SRR6126859_21 length_56928_cov_34.5009\n+>SRR6126859_22 length_55686_cov_38.1046\n+>SRR6126859_23 length_53048_cov_38.3551\n+>SRR6126859_24 length_49979_cov_31.737\n+>SRR6126859_25 length_49347_cov_32.9659\n+>SRR6126859_26 length_44649_cov_39.2504\n+>SRR6126859_27 length_36027_cov_38.5995\n+>SRR6126859_28 length_32857_cov_33.8707\n+>SRR6126859_29 length_26264_cov_32.2501\n+>SRR6126859_30 length_10716_cov_31.2279\n+>SRR6126859_32 length_6213_cov_64.7694\n+>SRR6126859_33 length_5280_cov_19.9357\n+>SRR6126859_36 length_4417_cov_33.2696\n+>SRR6126859_37 length_2182_cov_50.1815\n+>SRR6126859_39 length_1043_cov_65.8816\n+\n+Repeat sequences:\n+>SRR6126859_34 length_5115_cov_198.272(5 copies)\n+>SRR6126859_35 length_4648_cov_102.186(2 copies)\n+>SRR6126859_38 length_1598_cov_94.5224(2 copies)\n+\n+Discarded sequences:\n+>SRR6126859_31 length_7815_cov_9.7826\n+>SRR6126859_40 length_966_cov_206.632\n+>SRR6126859_41 length_667_cov_1.27941\n+>SRR6126859_42 length_665_cov_1.22951\n+>SRR6126859_43 length_602_cov_1.38757\n+>SRR6126859_44 length_574_cov_2.62428\n+>SRR6126859_45 length_573_cov_1.12934\n+>SRR6126859_46 length_519_cov_1.5625\n+>SRR6126859_47'..b'756\n+>SRR6126859_66 length_416_cov_1.07756\n+>SRR6126859_67 length_415_cov_1.08333\n+>SRR6126859_68 length_411_cov_1.64326\n+>SRR6126859_69 length_411_cov_1.09551\n+>SRR6126859_70 length_410_cov_1.09014\n+>SRR6126859_71 length_410_cov_1.04225\n+>SRR6126859_72 length_409_cov_0.977401\n+>SRR6126859_73 length_407_cov_1.03693\n+>SRR6126859_74 length_406_cov_1.11111\n+>SRR6126859_75 length_406_cov_1.10826\n+>SRR6126859_76 length_404_cov_1.0659\n+>SRR6126859_77 length_400_cov_1.12464\n+>SRR6126859_78 length_399_cov_0.918605\n+>SRR6126859_79 length_398_cov_1.13703\n+>SRR6126859_80 length_395_cov_0.979412\n+>SRR6126859_81 length_392_cov_1.09199\n+>SRR6126859_82 length_392_cov_1.04154\n+>SRR6126859_83 length_391_cov_1.16071\n+>SRR6126859_84 length_391_cov_1.14286\n+>SRR6126859_85 length_390_cov_1.0806\n+>SRR6126859_86 length_389_cov_1.16168\n+>SRR6126859_87 length_386_cov_1.17523\n+>SRR6126859_88 length_385_cov_1.18182\n+>SRR6126859_89 length_383_cov_1.1311\n+>SRR6126859_90 length_383_cov_1.01829\n+>SRR6126859_91 length_379_cov_1.20062\n+>SRR6126859_92 length_379_cov_1.2037\n+>SRR6126859_93 length_375_cov_1.2125\n+>SRR6126859_94 length_375_cov_0.95625\n+>SRR6126859_95 length_371_cov_1.23101\n+>SRR6126859_96 length_368_cov_1.17252\n+>SRR6126859_97 length_366_cov_1.2508\n+>SRR6126859_98 length_365_cov_1.25484\n+>SRR6126859_99 length_362_cov_1.26384\n+>SRR6126859_100 length_357_cov_1.25828\n+>SRR6126859_101 length_357_cov_1.28808\n+>SRR6126859_102 length_354_cov_1.30435\n+>SRR6126859_103 length_354_cov_39.5084\n+>SRR6126859_104 length_349_cov_1.31633\n+>SRR6126859_105 length_348_cov_1.33106\n+>SRR6126859_106 length_348_cov_1.06485\n+>SRR6126859_107 length_348_cov_0.849829\n+>SRR6126859_108 length_346_cov_1.20619\n+>SRR6126859_109 length_344_cov_1.34256\n+>SRR6126859_110 length_344_cov_1.29758\n+>SRR6126859_111 length_344_cov_1.31142\n+>SRR6126859_112 length_342_cov_1.35889\n+>SRR6126859_113 length_342_cov_1.19164\n+>SRR6126859_114 length_342_cov_0.923345\n+>SRR6126859_115 length_342_cov_31.9024\n+>SRR6126859_116 length_341_cov_1.36364\n+>SRR6126859_117 length_341_cov_1.36364\n+>SRR6126859_118 length_341_cov_1.36014\n+>SRR6126859_119 length_340_cov_1.32632\n+>SRR6126859_120 length_332_cov_1.27437\n+>SRR6126859_121 length_330_cov_1.41818\n+>SRR6126859_122 length_327_cov_1.43382\n+>SRR6126859_123 length_327_cov_1.34926\n+>SRR6126859_124 length_324_cov_1.44981\n+>SRR6126859_125 length_324_cov_1.44981\n+>SRR6126859_126 length_321_cov_1.46241\n+>SRR6126859_127 length_321_cov_55.8346\n+>SRR6126859_128 length_321_cov_1.46241\n+>SRR6126859_129 length_321_cov_1.46241\n+>SRR6126859_130 length_321_cov_1.21805\n+>SRR6126859_131 length_316_cov_1.49042\n+>SRR6126859_132 length_315_cov_1.49615\n+>SRR6126859_133 length_314_cov_1.37452\n+>SRR6126859_134 length_307_cov_1.54365\n+>SRR6126859_135 length_307_cov_2.25\n+>SRR6126859_136 length_306_cov_1.17131\n+>SRR6126859_137 length_273_cov_33.555\n+>SRR6126859_138 length_261_cov_25.1408\n+>SRR6126859_139 length_246_cov_73.3194\n+>SRR6126859_140 length_242_cov_34.2941\n+>SRR6126859_141 length_241_cov_84.7527\n+>SRR6126859_142 length_232_cov_26.5876\n+>SRR6126859_143 length_225_cov_41.1235\n+>SRR6126859_144 length_170_cov_188.53\n+>SRR6126859_145 length_123_cov_63.4853\n+>SRR6126859_146 length_117_cov_29.2742\n+>SRR6126859_147 length_112_cov_2.45614\n+>SRR6126859_148 length_100_cov_163.6\n+>SRR6126859_149 length_99_cov_124.886\n+>SRR6126859_150 length_88_cov_1.72727\n+>SRR6126859_151 length_87_cov_112.25\n+>SRR6126859_152 length_80_cov_191\n+>SRR6126859_153 length_79_cov_61.4583\n+>SRR6126859_154 length_79_cov_146.917\n+>SRR6126859_155 length_78_cov_233.957\n+>SRR6126859_156 length_77_cov_146.636\n+>SRR6126859_157 length_77_cov_101.818\n+>SRR6126859_158 length_71_cov_47.75\n+>SRR6126859_159 length_68_cov_268.077\n+>SRR6126859_160 length_66_cov_48.3636\n+>SRR6126859_161 length_66_cov_93.9091\n+>SRR6126859_162 length_66_cov_118.909\n+>SRR6126859_163 length_59_cov_247.5\n+>SRR6126859_164 length_56_cov_222\n+>SRR6126859_165 length_56_cov_35\n+>SRR6126859_166 length_56_cov_194\n+>SRR6126859_167 length_56_cov_801\n+\n'
b
diff -r 000000000000 -r 90957420cc07 test-data/SPAdes_scaffold_stats.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SPAdes_scaffold_stats.tabular Thu Oct 12 15:04:45 2017 -0400
b
@@ -0,0 +1,168 @@
+#name length coverage
+SRR6126859_1 852517 27.8446
+SRR6126859_2 405903 39.4874
+SRR6126859_3 294698 37.1403
+SRR6126859_4 269266 28.5399
+SRR6126859_5 264367 32.1759
+SRR6126859_6 229146 33.6442
+SRR6126859_7 228458 34.4838
+SRR6126859_8 221034 34.7177
+SRR6126859_9 189827 33.7581
+SRR6126859_10 183437 32.1863
+SRR6126859_11 162443 33.7668
+SRR6126859_12 153208 40.7852
+SRR6126859_13 151655 31.6292
+SRR6126859_14 135647 38.4498
+SRR6126859_15 120775 32.1216
+SRR6126859_16 106152 35.3715
+SRR6126859_17 77075 40.8552
+SRR6126859_18 64552 32.9705
+SRR6126859_19 60038 41.8947
+SRR6126859_20 57173 21.4966
+SRR6126859_21 56928 34.5009
+SRR6126859_22 55686 38.1046
+SRR6126859_23 53048 38.3551
+SRR6126859_24 49979 31.737
+SRR6126859_25 49347 32.9659
+SRR6126859_26 44649 39.2504
+SRR6126859_27 36027 38.5995
+SRR6126859_28 32857 33.8707
+SRR6126859_29 26264 32.2501
+SRR6126859_30 10716 31.2279
+SRR6126859_31 7815 9.7826
+SRR6126859_32 6213 64.7694
+SRR6126859_33 5280 19.9357
+SRR6126859_34 5115 198.272
+SRR6126859_35 4648 102.186
+SRR6126859_36 4417 33.2696
+SRR6126859_37 2182 50.1815
+SRR6126859_38 1598 94.5224
+SRR6126859_39 1043 65.8816
+SRR6126859_40 966 206.632
+SRR6126859_41 667 1.27941
+SRR6126859_42 665 1.22951
+SRR6126859_43 602 1.38757
+SRR6126859_44 574 2.62428
+SRR6126859_45 573 1.12934
+SRR6126859_46 519 1.5625
+SRR6126859_47 512 1.28009
+SRR6126859_48 502 1.30649
+SRR6126859_49 491 1.52294
+SRR6126859_50 487 1.23611
+SRR6126859_51 474 1.86158
+SRR6126859_52 466 0.944039
+SRR6126859_53 462 0.958231
+SRR6126859_54 454 75.198
+SRR6126859_55 453 0.974874
+SRR6126859_56 450 0.98481
+SRR6126859_57 444 1.49357
+SRR6126859_58 443 1
+SRR6126859_59 441 0.914508
+SRR6126859_60 437 1.01832
+SRR6126859_61 435 1.02632
+SRR6126859_62 434 1.02639
+SRR6126859_63 428 1.0429
+SRR6126859_64 426 40.0108
+SRR6126859_65 416 1.07756
+SRR6126859_66 416 1.07756
+SRR6126859_67 415 1.08333
+SRR6126859_68 411 1.64326
+SRR6126859_69 411 1.09551
+SRR6126859_70 410 1.09014
+SRR6126859_71 410 1.04225
+SRR6126859_72 409 0.977401
+SRR6126859_73 407 1.03693
+SRR6126859_74 406 1.11111
+SRR6126859_75 406 1.10826
+SRR6126859_76 404 1.0659
+SRR6126859_77 400 1.12464
+SRR6126859_78 399 0.918605
+SRR6126859_79 398 1.13703
+SRR6126859_80 395 0.979412
+SRR6126859_81 392 1.09199
+SRR6126859_82 392 1.04154
+SRR6126859_83 391 1.16071
+SRR6126859_84 391 1.14286
+SRR6126859_85 390 1.0806
+SRR6126859_86 389 1.16168
+SRR6126859_87 386 1.17523
+SRR6126859_88 385 1.18182
+SRR6126859_89 383 1.1311
+SRR6126859_90 383 1.01829
+SRR6126859_91 379 1.20062
+SRR6126859_92 379 1.2037
+SRR6126859_93 375 1.2125
+SRR6126859_94 375 0.95625
+SRR6126859_95 371 1.23101
+SRR6126859_96 368 1.17252
+SRR6126859_97 366 1.2508
+SRR6126859_98 365 1.25484
+SRR6126859_99 362 1.26384
+SRR6126859_100 357 1.25828
+SRR6126859_101 357 1.28808
+SRR6126859_102 354 1.30435
+SRR6126859_103 354 39.5084
+SRR6126859_104 349 1.31633
+SRR6126859_105 348 1.33106
+SRR6126859_106 348 1.06485
+SRR6126859_107 348 0.849829
+SRR6126859_108 346 1.20619
+SRR6126859_109 344 1.34256
+SRR6126859_110 344 1.29758
+SRR6126859_111 344 1.31142
+SRR6126859_112 342 1.35889
+SRR6126859_113 342 1.19164
+SRR6126859_114 342 0.923345
+SRR6126859_115 342 31.9024
+SRR6126859_116 341 1.36364
+SRR6126859_117 341 1.36364
+SRR6126859_118 341 1.36014
+SRR6126859_119 340 1.32632
+SRR6126859_120 332 1.27437
+SRR6126859_121 330 1.41818
+SRR6126859_122 327 1.43382
+SRR6126859_123 327 1.34926
+SRR6126859_124 324 1.44981
+SRR6126859_125 324 1.44981
+SRR6126859_126 321 1.46241
+SRR6126859_127 321 55.8346
+SRR6126859_128 321 1.46241
+SRR6126859_129 321 1.46241
+SRR6126859_130 321 1.21805
+SRR6126859_131 316 1.49042
+SRR6126859_132 315 1.49615
+SRR6126859_133 314 1.37452
+SRR6126859_134 307 1.54365
+SRR6126859_135 307 2.25
+SRR6126859_136 306 1.17131
+SRR6126859_137 273 33.555
+SRR6126859_138 261 25.1408
+SRR6126859_139 246 73.3194
+SRR6126859_140 242 34.2941
+SRR6126859_141 241 84.7527
+SRR6126859_142 232 26.5876
+SRR6126859_143 225 41.1235
+SRR6126859_144 170 188.53
+SRR6126859_145 123 63.4853
+SRR6126859_146 117 29.2742
+SRR6126859_147 112 2.45614
+SRR6126859_148 100 163.6
+SRR6126859_149 99 124.886
+SRR6126859_150 88 1.72727
+SRR6126859_151 87 112.25
+SRR6126859_152 80 191
+SRR6126859_153 79 61.4583
+SRR6126859_154 79 146.917
+SRR6126859_155 78 233.957
+SRR6126859_156 77 146.636
+SRR6126859_157 77 101.818
+SRR6126859_158 71 47.75
+SRR6126859_159 68 268.077
+SRR6126859_160 66 48.3636
+SRR6126859_161 66 93.9091
+SRR6126859_162 66 118.909
+SRR6126859_163 59 247.5
+SRR6126859_164 56 222
+SRR6126859_165 56 35
+SRR6126859_166 56 194
+SRR6126859_167 56 801
b
diff -r 000000000000 -r 90957420cc07 test-data/SPAdes_scaffolds_(fasta).fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SPAdes_scaffolds_(fasta).fasta Thu Oct 12 15:04:45 2017 -0400
b
b'@@ -0,0 +1,78979 @@\n+>SRR6126859_1 length_852517_cov_27.8446\n+GAGCACGACGTTGCCAAGGTCGGGGTCGCGAGTTCGAGTCTCGTTTCCCGCTCCAGTTTA\n+AAAGCATTGGCGTAAAGCGGATGTTGGCTGAAAAGCCAAAAAATTTGGCGCGTTAACAAA\n+GCGGTTATGTAGCGGATTGCAAATCCGTCTAGTCCGGTTCGACTCCGGAACGCGCCTCCA\n+ATTTTTCCCGAGCCCGGATGGTGGAATCGGTAGACACAAGGGATTTAAAATCCCTCGGCG\n+TTCGCGCTGTGCGGGTTCAAGTCCCGCTCCGGGTACCATGGGAAAGACAAGAATAATCAA\n+AGCAATAAGCAGTGTCGTGAAACCACCTACGGGTGGTTTTTTTGTGCCTTCAATTTGTAT\n+GCCGACCGCGAGCGTGAACTCTCCAGTGCGATACACAGTAAAAAACACCGGTAGCGGCAT\n+TTCTGCCAGTACAACAGTTATGTGCTACCGACCCGCAAGAACTTATGCCCGGTAAATACT\n+AAACGTCGGGTAAATCAGCCGATAAAATCCATTAATCTGTATAAAAATGCTGATATGTTT\n+TATGCCTGCTTTATTAAAGAGAGTAGGTCTGCCAGTGAAATATACCGATAATGCAAAATT\n+ATTTATTAGCTTCATTAATATTGGGATTGTAGGTTGCGCTTTTTTACTTTTTTTATTTTG\n+TATGAGCCTGAGAGTGCAATCGTAACCTACTCTCTTCCAGGAGAGCCTTCCTCTTCAACA\n+TGTGATGCGCGTAATTTTATTGGCTACGTCGATGAAAACGACTTTGCAGAGTATATAAAA\n+AAAGTAGAACTGACGCGCTACAGTATGTATTGTCTGAAAAAAACAGGGGCGGGTCAGTGG\n+AGTATTTATGCAAATTAAACGAATATATAACTGTTCAGGGGTGTTTTCTGCACAATTTAA\n+TTAAAGGTAGATAGCGAACTAATATTCATCATAAGGTAATGTGATCCGCATCATGCTTTA\n+ATCACGTTTTGCAATATTGTCTTAATAACAGGTCAACTACAGATAATTTCGGGTTTTCGT\n+GAATGGCGGTTTTGGTCAGTCTCTATATTATGCTTTCCCCGCTTTACGGAGATGATGATG\n+CAAAAGAAAACCTGGGCCGTTTTATTGATACCATTGTGCCTCACCGCTTGCCACCAGACG\n+ACCTACCGATCAACAGCCAAAGATGAAATTCTCACAGGGAGCTATATTGACCCGACCAAA\n+ACACGCTTTCCGTTAGCGGATTATTCACAGTCGGTTGATAAATGGATACCTCCGGACTCG\n+GCTGATTACACTATTCCGGTTATTGATAGTGCTACCCAACAACGCTACTTTAACGCACTA\n+AAATCTCATTATTTCGGAATGGACAGTGAAGCGCACTCGCCGTGGAATGGTTTTTATATT\n+ACCGCATTGCTGAAAAAAAACGCAGCACAGGCCCGGGATGCCAGTATTAAGCAGTTTCTT\n+AGCGACGGCAGTGCTTATTGGGGGGAAAACTTCAGACTTTATACCTCTCGCTGGAAAGAG\n+GAGGTTAGAGGGAATACGGATACGCAGATTGACAATATCTATAATGCCTCCCGGCGAGGC\n+ATTATGGTCAGAGAAAGTTTAGTCAGAGCGCTGCCAACGGACGACCCGCTCTTTAACGAT\n+CCCCGTCAGGCGGGCGAGGGGTATCCGTTTGATAATTTACAAATGTCTTCGCTGCGTCCC\n+GGTACGCCGGTCTATACGCTGACGAAGAGCAAAGACCAACGCTGGCAATATGTCGTTTCA\n+CCAGCGGTAACTGGCTGGGTTCATAGTGAAGATATTGCCAGTACGGATCAGAAATTTATA\n+ACACAGTGGGTTTTGCTCGCTCACAAGCAACTGGGGGCATTTATCAACGCGCCGGTTTCT\n+GTCCATGCCGCAGGCGTCTATTATTTCACCGGGCGGCCGGGCACTATTTTACCGTTCCGG\n+CATCAACGGGCGGGCCAGTTCCTCATTGCGGCGCCAGTTCGCGACAGCAACGGTCGCGCG\n+TTTATCCATTGGGTCTGGCTAAGCGGTAACGAGTTTACGGCTATGCCGTGGAAAATGACG\n+CCGGAAAATATCGCCGTGCTAATGAAAGCAATGCACGGCGCCCCCTACGGTTGGGGAAAT\n+TTTAACTTTTATAATGATTGTTCAGCCGAAGTCCGCAGCCTGTTAATGCCGTTTGGCATA\n+TTCCTGCCCAGACATTCATCCGCGCAGGTAGAGGCGGCTGGACGGGTGGTCGATCTTAGT\n+CATAAAAATCCCCAAATGCGGATCGATTATCTCACCAGATACGGAAAGCCGTTTACTACG\n+CTGGTCTATATTCCAGGGCATATTATGCTGTATATCGGTAACACAACCATGAATGGGCAG\n+GTTATGCCGATGACTTATCAGAATATCTGGGGATTGCGCCCAAACCATGCTAATAGCCGG\n+AGCATTATCGGTGAGGCAGTATTTTTGCCGTTACTGCGTTTTTATCCTGAAAATCCTGAG\n+CTGATATCGCTGGCTGGTAAGGTTCTGTTTAAGCTTGGTTATATAGAATAGGTTGACAAA\n+CTGTGTTATCGCGGAAGATATGATTAAGGCAACCGTGCGCGTGGATATAAAGAGAGTATC\n+CCTGTAAACTGTGTCTGAAAAAAGTATGCGCTATAGTGCGGGCTGTTTATTTGAAATACT\n+ATCTGGAGCTGATTCTGGATGTGATTATGTGCCTCACACATTGAAGTGAGAGTTATTGCT\n+GGTGGTATCTGGCTGGAGGCTGGGTGGTAAAAAGACGTAATTTATTGCATTTTAATAACG\n+GTGCCCGATCAGAGCGCGATCTTGGGGATCTGGTCACTAAAGTTTTTGAGAAAGCGGCGA\n+AAAAAGAACCGCAACCGCTATACACATTTAGCCTGCCGTTGCTGAGTGTGCAGGATGAGA\n+TCCGTGTTTACTGTAAGAAAAAAAATATCAAAATAGGGTACGATACGCTGTTTATGGAAA\n+TAACCTTTTCTGCTGACAGAGAAGCGGTAGACGAACTCATTAAACACTTTTTTACTGAAA\n+ATAAGCTCTATTTGAGAGGGCGGTTTTATCTGTCCGCGGCGGTGGTATAAGTTAGCATGA\n+GTATAATATAGCGAGAAACCCATCTCTCGCTTTTGTGTGGGGCGAGCGTTTCCTGGCGGA\n+AAATCTGCGCCCTGATCCTCCTTTTGTGCGTTAACTTCCGGAATCCTCCTCATTTCGTGT\n+TAATCTGGCCCCTGATGGTTGCCATCCAAACTGCCTGCAAAAAAATTATTTTTATCCGCT\n+GCAATGTAAGAGGCCTACAGGCGGAAGTCATCACTGAGGAAAAGCGTTATGAAAATGGAA\n+CCATTAAACGAGAACGAGCTGGAATGGCTGGATGATGTGCTGACGAAATACAATACCGAT\n+CAGGCCATTTTGGATGTCGCGGAGCTGGACGGTCTGATTACTGCGGTACTAAGTTCTCCG\n+CGTCCCATCGAGCCGGAACAGTGGCTGGTCGCGATATGGGGGGGACCCGCGTACGTACCG\n+CGCTGGACATCTGAAAAAGAAATGACGCGATTTATGGATCTGGTGTTCCAGCACATGGCG\n+GACACAGCCGCCCGGCTTGAAGATTATCCAGAACAGTTCGAGCCGCTGTTTGGCCTGCGA\n+GAAGTTGATGGCCATGAATTAACGATTGTTGAAGAATGGTGCTTTGGTTACATGCGCGGA\n+GTGTCGCTGTCCGACTGGTCTGACTTGCCGGATACGCTAAAACCGGCGCTGGAAGCGATT\n+GCCTTACACGGCACTGAAGAAAACTTTGCGCTGTTGGATAAAATGAGCCCGGAAGCGTTC\n+GATAAAAGCGTTGACGCTATCCGTATCGCGG'..b'TGAAGGATAATGTTGCTTTAGCAACGGCCCGAAGGGCG\n+AGGCAAAGCCGAGTCATCCTGCACGACCCACCATCCTGAATGATTGAAGCAGTAACCCTT\n+ATCCAAGGGGTCGTTAGCTCAGTTGGTAGAGCAGTTGACTTTTAATCAATTGGTCGCAGG\n+TT\n+>SRR6126859_141 length_241_cov_84.7527\n+GCAGTTGACTTTTAATCAATTGGTCGCAGGTTCGAATCCTGCACGACCCACCAATTTAAC\n+ATCAAACTCAGATGTTGAACGTGAAGGATAACGTTGCGTCAGCAACCTGTAGGGCGAGGC\n+GAAGCCGAGTCATCCTGCACGACCCACCACTAACTTCGGTTAGTCAGTAATATCCAGCGT\n+AGTATCGGGTGATTAGCTCAGCTGGGAGAGCACCTCCCTTACAAGGAGGGGGTCGGCGGT\n+T\n+>SRR6126859_142 length_232_cov_26.5876\n+GATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCAAATTTAGTG\n+TGCTGATATGGCTCAGTTGGTAGAGCGCACCCTTGGTAAGGGTGAGGTCCCCAGTTCGAC\n+TCTGGGTATCAGCACCACTTAATACGGTTAAAGTTCGGTATTTGAAAAAGAATTTGTCTG\n+GCGGCAGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAA\n+>SRR6126859_143 length_225_cov_41.1235\n+CCTTTGACGCCTGTATCCGCTTTCTCGGCGAAGACCCGTGGCTGCGCCTGCGCGAGCTTA\n+AAAAGGCCATGCCGAAGACTCCCCTGCAGATGCTGCTGCGCGGCCAGAACCTGCTCGGCT\n+ACCGCCATTACGCCGATGATGTGGTGGAACGCTTCGTTGAGCGGGCGGTGAAAAACGGCA\n+TGGACGTGTTCCGCGTCTTCGATGCCATGAACGACCCGCGCAATA\n+>SRR6126859_144 length_170_cov_188.53\n+TATCCCATTAGGGCTATTTTACTTGCCATTTTGGACCTGGGCAGTGCTCGCCAAAACGCG\n+TTAGCGTTTTGAACGCCGCTTGCGGCGGCCCGAAGGGCGAGCGTAGCGAGTCAAACCTCA\n+CGTACTACGTGTACGCTCCGGTTTTTGCGCGCTGTCCGTGTCCAAACTGG\n+>SRR6126859_145 length_123_cov_63.4853\n+TCCTGAAATTCAGGGTTGACTCTGAAAGAGGAAAGCGTAATATACGCCACCTCGCGACGG\n+TGAGCTGAAAGCCGCGTCGCACCTGCTCTTTAACAATTTATCAGACAATCTGTGTGGGCA\n+CTC\n+>SRR6126859_146 length_117_cov_29.2742\n+CGGTAGCGCCGCAGCCGCAGTATCAGCAGCCGCAGCAGCCGGTAGCGCCGCAGCCGCAGT\n+ATCAGCAGCCGCAACAGCCGGTAGCGCCGCAGCCGCAGTATCAGCAGCCGCAGCAGC\n+>SRR6126859_147 length_112_cov_2.45614\n+CGTGAGGAGCAAATGAGATGAAAACACTCTGCAGTGAGTCCCTGCCTGAGGGGAGGCCGT\n+GAGGAGCAAATGAGATGAAAACACTCTGCAGTGAGTCCCTGCCTGAGGGGAG\n+>SRR6126859_148 length_100_cov_163.6\n+TTGTTCGTGAGTCTCTCAAATTTTCGCAACACGATGATGAATCGTAAGAAACATCTTCGG\n+GTTGTGAGGTTAAGCGACTAAGCGTACACGGTGGATGCCC\n+>SRR6126859_149 length_99_cov_124.886\n+CGATTCATCATCGTGTTGCGAAAATTTGAGAGACTCACGAACAACTTTCGTTGTTCTGTG\n+TTTCAATTTTCAGCTTGATCCAGATTTTTAAAGAGCAAA\n+>SRR6126859_150 length_88_cov_1.72727\n+GATATGACCCCGCCCGACGATAGCGGCCCCGACGATAGCGGCGATGACGATGTGACCCCG\n+CCCGACGATAGCGGCGATGACGATGTGA\n+>SRR6126859_151 length_87_cov_112.25\n+TGGTGCGAGGGGGGGGACTTGAACCCCCACGTCCGTAAGGACACTAACACCTGAAGCTAG\n+CGCGTCTACCAATTCCGCCACCTTCGC\n+>SRR6126859_152 length_80_cov_191\n+ATGGTGCATCCGGGAGGATTCGAACCTCCGACCGCTCGGTTCGTAGCCGAGTACTCTATC\n+CAGCTGAGCTACGGATGCAT\n+>SRR6126859_153 length_79_cov_61.4583\n+ATGGTGGCTACGACGGGATTCGAACCTGTGACCCCATCATTATGAGTGATGTGCTCTAAC\n+CAACTGAGCTACGTAGCCA\n+>SRR6126859_154 length_79_cov_146.917\n+GGGTGATTAGCTCAGCTGGGAGAGCACCTCCCTTACAAGGAGGGGGTCGGCGGTTCGATC\n+CCGTCATCACCCACCACTT\n+>SRR6126859_155 length_78_cov_233.957\n+GGGTCGTTAGCTCAGTTGGTAGAGCAGTTGACTTTTAATCAATTGGTCGCAGGTTCGAAT\n+CCTGCACGACCCACCAAT\n+>SRR6126859_156 length_77_cov_146.636\n+TGGAGCGGGAAACGAGACTCGAACTCGCGACCCCGACCTTGGCAAGGTCGTGCTCTACCA\n+ACTGAGCTATTCCCGCA\n+>SRR6126859_157 length_77_cov_101.818\n+TGGTTGCGGGGGCCGGATTTGAACCGACGACCTTCGGGTTATGAGCCCGACGAGCTACCA\n+GGCTGCTCCACCCCGCG\n+>SRR6126859_158 length_71_cov_47.75\n+GGCTGCGGCGCTACCGGCTGCTGCGGCTGCTGATACTGCGGCTGCGGCGCTACCGGCTGT\n+TGCGGCTGCTG\n+>SRR6126859_159 length_68_cov_268.077\n+CCTCACGTACTACGTGTACGCTCCGGTTTTTGCGCGCTGTCCGTGTCCAAACTGGCTGCG\n+CCAATAAC\n+>SRR6126859_160 length_66_cov_48.3636\n+GATGTTTCTTACGATTCATCATCGTGTTGCGAAAATTTGAGAGACTCACGAACAACTTTC\n+GTTGTT\n+>SRR6126859_161 length_66_cov_93.9091\n+AACAACGAAAGTTGTTCGTGAGTCTCTCAAATTTTCGCAACACGATGATGAATCGAAAGA\n+AACATC\n+>SRR6126859_162 length_66_cov_118.909\n+GATGTTTCTTACGATTCATCATCGTGTTGCGAAAATTTGAGAGACTCACGAACAATTTTC\n+GTTGTT\n+>SRR6126859_163 length_59_cov_247.5\n+AGGCGTTATTGGCGCAGCCAGTTTGGACACGGACAGCGCGCAAAAACCGGAGCGTACAC\n+>SRR6126859_164 length_56_cov_222\n+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+>SRR6126859_165 length_56_cov_35\n+ATTTGCTCTTTAAAAATCTGGATCAAGCTGAAAATTGAAACACAGAACAACGAAAG\n+>SRR6126859_166 length_56_cov_194\n+TGGGTCGTTAGCTCAGTTGGTAGAGCAGTTGACTTTTAATCAATTGGTCGCAGGTT\n+>SRR6126859_167 length_56_cov_801\n+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\n'