Repository 'khmer_filter_abundance'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/khmer_filter_abundance

Changeset 0:71f98f6f68af (2015-11-11)
Next changeset 1:03246d4ea8cd (2015-11-11)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
added:
filter-abund.xml
filter-below-abund.py
macros.xml
test-data/normalize-by-median.c2.report.txt
test-data/normalize-by-median.paired.report.txt
test-data/normalize-by-median.report.txt
test-data/random-20-a.fa
test-data/random-20-a.fa.part
test-data/random-20-a.part.extract.fa
test-data/test-abund-read-2.fa
test-data/test-abund-read-2.fa.below
test-data/test-abund-read-2.large.oxlicg
test-data/test-abund-read-2.large.oxlicg.info
test-data/test-abund-read-2.oxlicg
test-data/test-abund-read-2.oxlicg.info
test-data/test-abund-read-paired.fa
tool_dependencies.xml
b
diff -r 000000000000 -r 71f98f6f68af filter-abund.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-abund.xml Wed Nov 11 09:45:40 2015 -0500
[
@@ -0,0 +1,80 @@
+<tool id="khmer_filter_abundance" name="Filter k-mer" version="@WRAPPER_VERSION@.0">
+    <description>
+        by minimal abundance
+    </description>
+    <macros>
+        <token name="@BINARY@">filter-abund.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+#for $num, $input in enumerate($inputs)
+    ln -s ${input} filter-abund-sequence-${num} &&
+#end for
+mkdir output && cd output &&
+@BINARY@
+--cutoff=${cutoff}
+${variable_coverage}
+@THREADS@
+${input_countgraph_filename}
+../filter-abund-sequence*
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequences_filenames" />
+        <param name="variable_coverage" type="boolean" checked="false" truevalue="--variable-coverage" falsevalue=""
+            label="Variable coverage"
+            help="Only trim when a sequence has high enough coverage; median abundance > 20 (--variable_coverage)" />
+        <param name="cutoff" type="integer" value="2" label="Cutoff"
+            help="Trim at k-mers below this abundance. (--cutoff)" />
+        <expand macro="input_countgraph_filename" />
+    </inputs>
+    <outputs>
+        <collection name="filter-abund-sequences" type="list">
+            <discover_datasets pattern="__name__" directory="output" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputs" value="test-abund-read-2.fa" />
+            <param name="input_countgraph_filename"
+                value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+            <output_collection name="filter-abund-sequences" type="list">
+                <element name="filter-abund-sequence-0.abundfilt">
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="inputs" value="test-abund-read-2.fa" />
+            <param name="input_countgraph_filename"
+                value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+            <param name="cutoff" value="1" />
+            <output_collection name="filter-abund-sequences" type="list">
+                <element name="filter-abund-sequence-0.abundfilt">
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+Trims fastq/fasta sequences at k-mers of a given abundance based on a provided k-mer countgraph
+
+If the input sequences are from RNAseq or metagenome sequencing then
+`--variable-coverage` should be used.
+
+@HELP_FOOTER@
+]]>
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="counting-citation" />
+    </citations>
+</tool>
b
diff -r 000000000000 -r 71f98f6f68af filter-below-abund.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-below-abund.py Wed Nov 11 09:45:40 2015 -0500
[
@@ -0,0 +1,87 @@
+#! /usr/bin/env python
+# This file is part of khmer, https://github.com/dib-lab/khmer/, and is
+# Copyright (C) 2011-2015, Michigan State University.
+# Copyright (C) 2015, The Regents of the University of California.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#
+#     * Neither the name of the Michigan State University nor the names
+#       of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written
+#       permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Contact: khmer-project@idyll.org
+from __future__ import print_function
+import sys
+import os
+import khmer
+from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter
+
+WORKER_THREADS = 8
+GROUPSIZE = 100
+
+CUTOFF = 50
+
+###
+
+
+def main():
+    counting_ht = sys.argv[1]
+    infiles = sys.argv[2:]
+
+    print('file with ht: %s' % counting_ht)
+    print('-- settings:')
+    print('N THREADS', WORKER_THREADS)
+    print('--')
+
+    print('making hashtable')
+    ht = khmer.load_countgraph(counting_ht)
+    K = ht.ksize()
+
+    for infile in infiles:
+        print('filtering', infile)
+        outfile = os.path.basename(infile) + '.below'
+
+        outfp = open(outfile, 'w')
+
+        def process_fn(record, ht=ht):
+            name = record['name']
+            seq = record['sequence']
+            if 'N' in seq:
+                return None, None
+
+            trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF)
+
+            if trim_at >= K:
+                return name, trim_seq
+
+            return None, None
+
+        tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE)
+
+        tsp.start(verbose_fasta_iter(infile), outfp)
+
+if __name__ == '__main__':
+    main()
b
diff -r 000000000000 -r 71f98f6f68af macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Nov 11 09:45:40 2015 -0500
[
@@ -0,0 +1,148 @@
+<macros>
+    <token name="@WRAPPER_VERSION@">2.0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@WRAPPER_VERSION@">khmer</requirement>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command><![CDATA[@BINARY@ --version 2>&1 | tail -n 1 | cut -d ' ' -f 2]]></version_command>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" />
+        </stdio>
+    </xml>
+    <token name="@TABLEPARAMS@">
+<![CDATA[
+#if $parameters.type == "simple"
+  --ksize=20
+  --n_tables=4
+  --max-tablesize=$parameters.tablesize
+#else
+  --ksize=$parameters.ksize
+  --n_tables=$parameters.n_tables
+  --max-tablesize="$parameters.tablesize_specific"
+#end if
+]]>
+    </token>
+    <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token>
+    <xml name="tableinputs">
+        <conditional name="parameters">
+            <param name="type" type="select" label="Advanced Parameters"
+                help="ksize, n_tables, a specific tablesize" >
+                <option value="simple" selected="true">Hide</option>
+                <option value="specific">Show</option>
+            </param>
+            <when value="simple">
+                <param name="tablesize" type="select" label="Sample Type" display="radio">
+                    <option value="1e9" selected="true">Microbial Genome</option>
+                    <option value="2e9">Animal Transcriptome</option>
+                    <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option>
+                    <option value="16e9">Large Animal Genome</option>
+                </param>
+            </when>
+            <when value="specific">
+                <param name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" />
+                <param name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" />
+                <param name="tablesize_specific" type="text" value="1000000.0" 
+                    label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="input_sequences_filenames">
+        <param  name="inputs" multiple="true" type="data" format="fasta,fastq"
+            label="Sequences in FASTA or FASTQ format"
+            help="Put in order of precedence such as longest reads first." />
+    </xml>
+    <xml name="input_sequence_filename">
+        <param  name="input_sequence_filename" type="data" format="fasta,fastq"
+            label="Sequence in FASTA or FASTQ format" />
+    </xml>
+    <xml name="input_countgraph_filename">
+        <param  name="input_countgraph_filename" type="data" format="oxlicg"
+            label="the k-mer countgraph to query"
+            help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." />
+    </xml>
+    <xml name="abundance-histogram-output">
+        <data name="output_histogram_filename" format="txt"
+            label="${tool.name} k-mer abundance histogram. The
+                columns are: (1) k-mer abundance, (2) k-mer count, (3)
+                cumulative count, (4) fraction of total distinct k-mers." />
+    </xml>
+    <xml name="output_sequences">
+        <data name="output" format_source="inputs"
+            label="${tool.name} processed nucleotide sequence file">
+            <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true"/>
+        </data>
+    </xml>
+    <xml name="output_sequences_single">
+        <data name="output" format_source="input_sequence_filename"
+            label="${tool.name} processed nucleotide sequence file" />
+    </xml>
+    <xml name="input_zero">
+        <param name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true"
+            help="Output zero count bins (--no-zero)" />
+    </xml>
+    <xml name="input_bigcount">
+        <param  name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount"
+            checked="true" help="Count k-mers past 255 occurences (--no-bigcount)" />
+    </xml>
+    <token name="@HELP_FOOTER@"><![CDATA[
+(from the khmer project: http://khmer.readthedocs.org/en/v@WRAPPER_VERSION@/ )]]></token>
+    <xml name="software-citation">
+        <citation type="bibtex">@article{khmer2015,
+     author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine
+  and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,
+  Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,
+  Jacob and Fenzl, Thomas and Fish, Jordan and Garcia-Gutierrez, Leonor and
+  Garland, Phillip and Gluck, Jonathan and González, Iván and Guermond, Sarah
+  and Guo, Jiarong and Gupta, Aditi and Herr, Joshua R. and Howe, Adina and
+  Hyer, Alex and Härpfer, Andreas and Irber, Luiz and Kidd, Rhys and Lin, David
+  and Lippi, Justin and Mansour, Tamer and McA'Nulty, Pamela and McDonald, Eric
+  and Mizzi, Jessica and Murray, Kevin D. and Nahum, Joshua R. and Nanlohy,
+  Kaben and Nederbragt, Alexander Johan and Ortiz-Zuazaga, Humberto and Ory,
+  Jeramia and Pell, Jason and Pepe-Ranney, Charles and Russ, Zachary N and
+  Schwarz, Erich and Scott, Camille and Seaman, Josiah and Sievert, Scott and
+  Simpson, Jared and Skennerton, Connor T. and Spencer, James and Srinivasan,
+  Ramakrishnan and Standage, Daniel and Stapleton, James A. and Stein, Joe and
+  Steinman, Susan R and Taylor, Benjamin and Trimble, Will and Wiencko, Heather
+  L. and Wright, Michael and Wyss, Brian and Zhang, Qingpeng and zyme, en and
+  Brown, C. Titus"
+     title = "The khmer software package: enabling efficient nucleotide
+  sequence analysis",
+     year = "2015",
+     month = "08",
+     publisher = "F1000",
+     url = "http://dx.doi.org/10.12688/f1000research.6924.1"
+  }</citation>
+    </xml>
+    <xml name="diginorm-citation">
+        <citation type="bibtex">@unpublished{diginorm,
+    author = "Brown, C Titus and Howe, Adina and Zhang, Qingpeng and Pyrkosz,
+Alexis B and Brom, Timothy H",
+    title = "A Reference-Free Algorithm for Computational Normalization of
+Shotgun Sequencing Data",
+    year = "2012",
+    eprint = "arXiv:1203.4802",
+    url = "http://arxiv.org/abs/1203.4802",
+}</citation></xml>
+    <xml name="graph-citation">
+        <citation type="bibtex">@article{Pell2012,
+  doi = {10.1073/pnas.1121464109},
+  url = {http://dx.doi.org/10.1073/pnas.1121464109},
+  year  = {2012},
+  month = {jul},
+  publisher = {Proceedings of the National Academy of Sciences},
+  volume = {109},
+  number = {33},
+  pages = {13272--13277},
+  author = {J. Pell and A. Hintze and R. Canino-Koning and A. Howe and J. M. Tiedje and C. T. Brown},
+  title = {Scaling metagenome sequence assembly with probabilistic de Bruijn graphs},
+  journal = {Proceedings of the National Academy of Sciences}
+  }</citation>
+    </xml>
+    <xml name="counting-citation">
+        <citation type="doi">10.1371/journal.pone.0101271</citation>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 71f98f6f68af test-data/normalize-by-median.c2.report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalize-by-median.c2.report.txt Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,2 @@
+total,kept,f_kept
+1001,2,0.001998
b
diff -r 000000000000 -r 71f98f6f68af test-data/normalize-by-median.paired.report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalize-by-median.paired.report.txt Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,2 @@
+total,kept,f_kept
+6,2,0.3333
b
diff -r 000000000000 -r 71f98f6f68af test-data/normalize-by-median.report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalize-by-median.report.txt Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,2 @@
+total,kept,f_kept
+1001,1,0.000999
b
diff -r 000000000000 -r 71f98f6f68af test-data/random-20-a.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random-20-a.fa Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,198 @@
+>35
+CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
+>16
+CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
+>46
+GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
+>40
+GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG
+>33
+GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG
+>98
+ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC
+>17
+CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA
+>89
+GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA
+>30
+GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA
+>82
+ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT
+>60
+GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA
+>83
+CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG
+>12
+AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG
+>85
+CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC
+>2
+CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC
+>45
+ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA
+>11
+GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG
+>39
+CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA
+>26
+AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA
+>75
+GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG
+>81
+GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG
+>97
+ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC
+>13
+AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT
+>92
+ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG
+>56
+AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA
+>61
+TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA
+>96
+ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG
+>31
+CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC
+>29
+TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT
+>54
+TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC
+>0
+TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT
+>90
+GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG
+>34
+TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG
+>43
+AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG
+>8
+ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG
+>37
+TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC
+>51
+ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG
+>32
+GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG
+>78
+TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC
+>18
+CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG
+>36
+TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA
+>53
+ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC
+>24
+AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC
+>7
+AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC
+>9
+AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT
+>47
+CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA
+>62
+ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA
+>79
+TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG
+>48
+TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC
+>66
+GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA
+>25
+GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC
+>5
+TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC
+>72
+ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT
+>76
+CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT
+>69
+GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT
+>87
+CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT
+>27
+TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC
+>77
+TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC
+>95
+TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC
+>63
+TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC
+>38
+CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG
+>20
+GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC
+>88
+GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT
+>49
+TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG
+>91
+TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG
+>86
+CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA
+>42
+CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC
+>70
+ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG
+>19
+GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG
+>84
+AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG
+>52
+TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT
+>71
+AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA
+>93
+CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA
+>58
+TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT
+>22
+TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG
+>50
+ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG
+>21
+TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT
+>73
+CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC
+>68
+CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT
+>23
+GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT
+>94
+AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA
+>10
+TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA
+>41
+GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC
+>80
+TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG
+>64
+AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA
+>57
+TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC
+>1
+GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT
+>55
+GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC
+>67
+GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT
+>14
+CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG
+>15
+AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT
+>59
+TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA
+>28
+CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA
+>74
+CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA
+>4
+TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC
+>65
+TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT
+>6
+ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG
+>44
+CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC
+>3
+TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
b
diff -r 000000000000 -r 71f98f6f68af test-data/random-20-a.fa.part
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random-20-a.fa.part Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,198 @@
+>35 2
+CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
+>16 2
+CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
+>46 2
+GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
+>40 2
+GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG
+>33 2
+GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG
+>98 2
+ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC
+>17 2
+CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA
+>89 2
+GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA
+>30 2
+GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA
+>82 2
+ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT
+>60 2
+GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA
+>83 2
+CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG
+>12 2
+AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG
+>85 2
+CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC
+>2 2
+CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC
+>45 2
+ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA
+>11 2
+GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG
+>39 2
+CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA
+>26 2
+AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA
+>75 2
+GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG
+>81 2
+GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG
+>97 2
+ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC
+>13 2
+AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT
+>92 2
+ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG
+>56 2
+AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA
+>61 2
+TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA
+>96 2
+ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG
+>31 2
+CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC
+>29 2
+TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT
+>54 2
+TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC
+>0 2
+TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT
+>90 2
+GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG
+>34 2
+TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG
+>43 2
+AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG
+>8 2
+ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG
+>37 2
+TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC
+>51 2
+ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG
+>32 2
+GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG
+>78 2
+TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC
+>18 2
+CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG
+>36 2
+TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA
+>53 2
+ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC
+>24 2
+AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC
+>7 2
+AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC
+>9 2
+AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT
+>47 2
+CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA
+>62 2
+ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA
+>79 2
+TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG
+>48 2
+TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC
+>66 2
+GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA
+>25 2
+GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC
+>5 2
+TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC
+>72 2
+ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT
+>76 2
+CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT
+>69 2
+GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT
+>87 2
+CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT
+>27 2
+TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC
+>77 2
+TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC
+>95 2
+TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC
+>63 2
+TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC
+>38 2
+CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG
+>20 2
+GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC
+>88 2
+GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT
+>49 2
+TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG
+>91 2
+TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG
+>86 2
+CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA
+>42 2
+CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC
+>70 2
+ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG
+>19 2
+GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG
+>84 2
+AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG
+>52 2
+TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT
+>71 2
+AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA
+>93 2
+CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA
+>58 2
+TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT
+>22 2
+TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG
+>50 2
+ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG
+>21 2
+TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT
+>73 2
+CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC
+>68 2
+CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT
+>23 2
+GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT
+>94 2
+AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA
+>10 2
+TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA
+>41 2
+GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC
+>80 2
+TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG
+>64 2
+AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA
+>57 2
+TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC
+>1 2
+GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT
+>55 2
+GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC
+>67 2
+GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT
+>14 2
+CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG
+>15 2
+AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT
+>59 2
+TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA
+>28 2
+CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA
+>74 2
+CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA
+>4 2
+TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC
+>65 2
+TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT
+>6 2
+ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG
+>44 2
+CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC
+>3 2
+TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
b
diff -r 000000000000 -r 71f98f6f68af test-data/random-20-a.part.extract.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random-20-a.part.extract.fa Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,198 @@
+>35 2
+CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
+>16 2
+CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
+>46 2
+GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
+>40 2
+GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG
+>33 2
+GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG
+>98 2
+ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC
+>17 2
+CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA
+>89 2
+GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA
+>30 2
+GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA
+>82 2
+ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT
+>60 2
+GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA
+>83 2
+CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG
+>12 2
+AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG
+>85 2
+CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC
+>2 2
+CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC
+>45 2
+ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA
+>11 2
+GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG
+>39 2
+CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA
+>26 2
+AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA
+>75 2
+GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG
+>81 2
+GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG
+>97 2
+ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC
+>13 2
+AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT
+>92 2
+ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG
+>56 2
+AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA
+>61 2
+TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA
+>96 2
+ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG
+>31 2
+CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC
+>29 2
+TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT
+>54 2
+TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC
+>0 2
+TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT
+>90 2
+GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG
+>34 2
+TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG
+>43 2
+AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG
+>8 2
+ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG
+>37 2
+TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC
+>51 2
+ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG
+>32 2
+GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG
+>78 2
+TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC
+>18 2
+CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG
+>36 2
+TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA
+>53 2
+ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC
+>24 2
+AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC
+>7 2
+AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC
+>9 2
+AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT
+>47 2
+CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA
+>62 2
+ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA
+>79 2
+TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG
+>48 2
+TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC
+>66 2
+GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA
+>25 2
+GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC
+>5 2
+TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC
+>72 2
+ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT
+>76 2
+CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT
+>69 2
+GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT
+>87 2
+CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT
+>27 2
+TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC
+>77 2
+TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC
+>95 2
+TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC
+>63 2
+TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC
+>38 2
+CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG
+>20 2
+GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC
+>88 2
+GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT
+>49 2
+TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG
+>91 2
+TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG
+>86 2
+CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA
+>42 2
+CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC
+>70 2
+ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG
+>19 2
+GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG
+>84 2
+AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG
+>52 2
+TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT
+>71 2
+AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA
+>93 2
+CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA
+>58 2
+TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT
+>22 2
+TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG
+>50 2
+ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG
+>21 2
+TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT
+>73 2
+CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC
+>68 2
+CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT
+>23 2
+GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT
+>94 2
+AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA
+>10 2
+TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA
+>41 2
+GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC
+>80 2
+TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG
+>64 2
+AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA
+>57 2
+TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC
+>1 2
+GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT
+>55 2
+GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC
+>67 2
+GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT
+>14 2
+CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG
+>15 2
+AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT
+>59 2
+TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA
+>28 2
+CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA
+>74 2
+CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA
+>4 2
+TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC
+>65 2
+TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT
+>6 2
+ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG
+>44 2
+CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC
+>3 2
+TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
b
diff -r 000000000000 -r 71f98f6f68af test-data/test-abund-read-2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.fa Wed Nov 11 09:45:40 2015 -0500
b
b'@@ -0,0 +1,2002 @@\n+>895:1:37:17593:9954/1\n+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGG'..b'\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n'
b
diff -r 000000000000 -r 71f98f6f68af test-data/test-abund-read-2.fa.below
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.fa.below Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,2 @@
+>895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
b
diff -r 000000000000 -r 71f98f6f68af test-data/test-abund-read-2.large.oxlicg
b
Binary file test-data/test-abund-read-2.large.oxlicg has changed
b
diff -r 000000000000 -r 71f98f6f68af test-data/test-abund-read-2.large.oxlicg.info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.large.oxlicg.info Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,4 @@
+through test-abund-read-2.fa
+Total number of unique k-mers: 83
+fp rate estimated to be 0.000
+
b
diff -r 000000000000 -r 71f98f6f68af test-data/test-abund-read-2.oxlicg
b
Binary file test-data/test-abund-read-2.oxlicg has changed
b
diff -r 000000000000 -r 71f98f6f68af test-data/test-abund-read-2.oxlicg.info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.oxlicg.info Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,4 @@
+through /home/mcrusoe/khmer/tests/test-data/test-abund-read-2.fa
+Total number of unique k-mers: 98
+fp rate estimated to be 0.000
+
b
diff -r 000000000000 -r 71f98f6f68af test-data/test-abund-read-paired.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-paired.fa Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,12 @@
+>895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954 1::FOO
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954 2::FOO
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
b
diff -r 000000000000 -r 71f98f6f68af tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Nov 11 09:45:40 2015 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="khmer" version="2.0">
+        <repository changeset_revision="f257b2dba774" name="package_khmer_2_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>