Repository 'khmer'
hg clone https://toolshed.g2.bx.psu.edu/repos/crusoe/khmer

Changeset 0:0200bae65db6 (2015-07-07)
Next changeset 1:9e66f77aa094 (2015-07-07)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
added:
abundance-dist-single.xml
abundance-dist.xml
count-median.xml
do-partition.xml
extract-partitions.xml
filter-abund.xml
filter-below-abund.py
filter-below-abund.xml
macros.xml
normalize-by-median.xml
out
out2
repository_dependencies.xml
test-data/random-20-a.fa
test-data/random-20-a.fa.part
test-data/test-abund-read-2.ct
test-data/test-abund-read-2.ct.info
test-data/test-abund-read-2.fa
test-data/test-abund-read-2.nobigcount.ct
test-data/test-abund-read-2.nobigcount.ct.info
test-data/test-abund-read-paired.fa
tool_dependencies.xml
b
diff -r 000000000000 -r 0200bae65db6 abundance-dist-single.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/abundance-dist-single.xml Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,106 @@
+<tool id="gedlab-khmer-abundance-dist-single"
+ name="Abundance Distribution (all-in-one)"
+ version="2.0rc1-1">
+
+ <description>
+ Calculate abundance distribution of the k-mers in a given
+ sequence file.
+ </description>
+ <macros>
+ <token name="@BINARY@">abundance-dist-single.py</token>
+ <import>macros.xml</import>
+ </macros>
+ <expand macro="requirements" />
+ <expand macro="stdio" />
+ <expand macro="version" />
+ <command><![CDATA[
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+mkdir output; cd output;
+@BINARY@
+@TABLEPARAMS@
+$zero
+$bigcount
+#if $save_countingtable
+--savetable=$optional_output_countingtable
+#end if
+--report-total-kmers
+--squash
+@THREADS@
+$input_sequence_filename
+$output_histogram_filename
+]]>
+ </command>
+
+ <inputs>
+ <expand macro="input_sequence_filename" />
+ <param name="save_countingtable"
+ type="boolean"
+ label="Save the k-mer counting table(s) in a file"
+ help="(--savetable)" />
+ <expand macro="input_zero" />
+ <param name="bigcount"
+ type="boolean"
+ truevalue=""
+ falsevalue="--no-bigcount"
+ checked="true"
+ help="Count k-mers past 255 (--no-bigcount)" />
+ <expand macro="tableinputs" />
+ </inputs>
+ <outputs>
+ <data name="optional_output_countingtable"
+ format="ct"
+ label="${tool.name} k-mer counting table">
+ <filter>save_countingtable == True</filter>
+ </data>
+ <expand macro="abundance-histogram-output" />
+ </outputs>
+    <tests>
+     <test>
+     <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+     <param name="type" value="specific" />
+     <param name="tablesize_specific" value="1e7" />
+     <param name="n_tables" value="2" />
+     <param name="ksize" value="17" />
+     <param name="no_zero" value="false" />
+     <output name="output_histogram_filename">
+     <assert_contents>
+     <has_text text="1 96 96 0.98" />
+     <has_text text="1001 2 98 1.0" />
+     </assert_contents>
+     </output>
+     </test>
+     <test>
+     <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+     <param name="type" value="specific" />
+     <param name="tablesize_specific" value="1e7" />
+     <param name="n_tables" value="2" />
+     <param name="ksize" value="17" />
+     <param name="no_zero" value="false" />
+     <param name="bigcount" value="false" />
+     <output name="output_histogram_filename">
+     <assert_contents>
+     <has_text text="1 96 96 0.98" />
+     <has_text text="255 2 98 1.0" />
+     </assert_contents>
+     </output>
+     </test>
+
+    </tests>
+    <help><![CDATA[
+Calculate the abundance distribution of k-mers from a single sequence file.
+
+Note that with :option:`-b` this script is constant memory; in exchange,
+k-mer counts will stop at 255. The memory usage of this script with
+:option:`-b` will be about 1.15x the product of the :option:`-x` and
+:option:`-N` numbers.
+
+To count k-mers in multiple files use :program:`load_into_counting.py` and
+:program:`abundance_dist.py`.
+]]>
+    </help>
+    <citations>
+ <expand macro="software-citation" />
+ <expand macro="counting-citation" />
+    </citations>
+</tool>
b
diff -r 000000000000 -r 0200bae65db6 abundance-dist.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/abundance-dist.xml Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,72 @@
+<tool id="gedlab-khmer-abundance-dist"
+ name="Abundance Distribution"
+ version="2.0rc1-1">
+
+ <description>
+ Calculate abundance distribution of the k-mers in a given sequence
+ file using a pre-made k-mer counting table.
+ </description>
+        <macros>
+ <token name="@BINARY@">abundance-dist.py</token>
+ <import>macros.xml</import>
+        </macros>
+ <expand macro="requirements" />
+ <expand macro="stdio" />
+ <expand macro="version" />
+ <command><![CDATA[
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+mkdir output; cd output;
+@BINARY@
+--squash
+$input_counting_table_filename
+$input_sequence_filename
+$output_histogram_filename
+]]>
+ </command>
+
+ <inputs>
+ <expand macro="input_counting_table_filename" />
+ <expand macro="input_sequence_filename" />
+ <expand macro="input_zero" />
+ </inputs>
+ <outputs>
+ <expand macro="abundance-histogram-output" />
+ </outputs>
+ <tests>
+ <test>
+                     <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+ <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" />
+ <param name="no_zero" value="false" />
+                     <output name="output_histogram_filename">
+ <assert_contents>
+ <has_line_matching expression="1 96 96 0.98" />
+ <has_line_matching expression="1001 2 98 1.0" />
+ </assert_contents>
+ </output>
+ </test>
+ <test>
+                     <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+  <param name="input_counting_table_filename" value="test-abund-read-2.nobigcount.ct" ftype="ct" />
+ <param name="no_zero" value="false" />
+ <assert_stderr>
+ <has_line_matching expression="WARNING: The loaded graph has bigcount" />
+ </assert_stderr>
+                     <output name="output_histogram_filename">
+                             <assert_contents>
+                                     <has_line_matching expression="1 96 96 0.98" />
+                                     <has_line_matching expression="255 2 98 1.0" />
+                             </assert_contents>
+                     </output>
+             </test>
+     </tests>
+ <help><![CDATA[
+Calculate abundance distribution of the k-mers in the sequence file using a
+pre-made k-mer counting table.
+]]>
+ </help>
+ <citations>
+ <expand macro="software-citation" />
+ <expand macro="counting-citation" />
+ </citations>
+</tool>
b
diff -r 000000000000 -r 0200bae65db6 count-median.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/count-median.xml Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,64 @@
+<tool id="gedlab-khmer-count-median"
+ name="Count Median"
+ version="2.0rc1-1">
+
+ <description>
+ Count the median/avg k-mer abundance for each sequence in the
+ input file, based on the k-mer counts in the given k-mer
+ counting table. Can be used to estimate expression levels
+ (mRNAseq) or coverage (genomic/metagenomic).
+ </description>
+        <macros>
+ <token name="@BINARY@">count-median.py</token>
+ <import>macros.xml</import>
+        </macros>
+ <expand macro="requirements" />
+ <expand macro="stdio" />
+ <expand macro="version" />
+ <command><![CDATA[
+@BINARY@
+$input_counting_table_filename
+$input_sequence_filename
+$output_summary_filename
+]]>
+ </command>
+
+ <inputs>
+ <expand macro="input_sequence_filename" />
+ <expand macro="input_counting_table_filename" />
+ </inputs>
+ <outputs>
+ <data name="output_summary_filename" format="txt"
+ label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" />
+ </outputs>
+ <tests>
+ <test>
+ <param name="input_sequence_filename"
+ value="test-abund-read-2.fa" />
+ <param name="input_counting_table_filename"
+ value="test-abund-read-2.ct" ftype="ct" />
+                        <output name="output_summary_filename">
+                                <assert_contents>
+ <has_line_matching
+ expression="seq 1001 1001.0 0.0 18" />
+ <has_line_matching
+ expression="895:1:37:17593:9954/1 1 103.803741455 303.702941895 114" />
+                                </assert_contents>
+                        </output>
+ </test>
+ </tests>
+ <help>
+Count the median/avg k-mer abundance for each sequence in the input file,
+based on the k-mer counts in the given k-mer counting table. Can be used to
+estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The
+output file contains sequence id, median, average, stddev, and seq length;
+fields are separated by spaces. For khmer 1.x count-median.py will split
+sequence names at the first space which means that some sequence formats (e.g.
+paired FASTQ in Casava 1.8 format) will yield uninformative names. Use
+:option:`--csv` to fix this behavior.
+ </help>
+ <citations>
+ <expand macro="software-citation" />
+ <expand macro="diginorm-citation" />
+ </citations>
+</tool>
b
diff -r 000000000000 -r 0200bae65db6 do-partition.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/do-partition.xml Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,80 @@
+<tool id="gedlab-khmer-do-partition"
+ name="Sequence partition all-in-one"
+ version="2.0rc1-1">
+
+ <description>
+ Load, partition, and annotate FAST[AQ] sequences
+ </description>
+        <macros>
+                <token name="@BINARY@">do-parition.py</token>
+                <import>macros.xml</import>
+        </macros>
+ <expand macro="requirements" />
+ <expand macro="stdio" />
+ <expand macro="version" />
+ <command><![CDATA[
+mkdir -p output;
+@BINARY@
+@TABLEPARAMS@
+@THREADS@
+output
+#for input in $inputs
+$input
+#end for ;
+mv output.info $infomation ;
+mv *.part output/
+]]>
+ </command>
+
+ <inputs>
+ <expand macro="input_sequences_filenames" />
+ <param name="ksize"
+ type="integer"
+ value="20"
+ label="ksize"
+ help="k-mer size to use (--ksize/-k)" />
+ <param name="n_tables"
+ type="integer"
+ min="1"
+ value="4"
+ label="n_tables"
+ help="number of tables to use (--n_tables/-N)" />
+ <param name="tablesize_specific"
+ type="text"
+ label="tablesize"
+ help="lower bound on the tablesize to use (--min-tablesize/-x)" />
+ </inputs>
+ <outputs>
+ <data name="information"
+ format="text"
+ label="${tool.name} summary for #echo ','.join(map(str, $inputs ))#" />
+ <expand macro="output_sequences" />
+ </outputs>
+ <tests>
+ <test>
+                        <param name="inputs" value="random-20-a.fa"/>
+ <output name="output">
+ <discovered_dataset designation="random-20-a.fa.part">
+ <assert_contents>
+ <has_text text='>35     2' />
+ </assert_contents>
+ </discovered_dataset>
+                        </output>
+                </test>
+ </tests>
+ <help><![CDATA[
+Load in a set of sequences, partition them, merge the partitions, and
+annotate the original sequences files with the partition information.
+
+This script combines the functionality of :program:`load-graph.py`,
+:program:`partition-graph.py`, :program:`merge-partitions.py`, and
+:program:`annotate-partitions.py` into one script. This is convenient
+but should probably not be used for large data sets, because
+:program:`do-partition.py` doesn't provide save/resume functionality.
+]]>
+ </help>
+ <citations>
+ <expand macro="software-citation" />
+ <expand macro="graph-citation" />
+ </citations>
+</tool>
b
diff -r 000000000000 -r 0200bae65db6 extract-partitions.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract-partitions.xml Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,78 @@
+<tool id="gedlab-khmer-extract-partitions"
+ name="Extract partitions"
+ version="2.0rc1-1">
+
+ <description>
+ Separate sequences that are annotated with partitions into
+ grouped files.
+ </description>
+        <macros>
+                <token name="@BINARY@">extract-partitions.py</token>
+                <import>macros.xml</import>
+        </macros>
+ <expand macro="requirements" />
+ <expand macro="stdio" />
+ <expand macro="version" />
+ <command><![CDATA[
+mkdir -p output ;
+cd output ;
+@BINARY@
+--max-size $max_size
+--min-partition-size $min_partition_size
+$output_unassigned
+output
+#for input in $inputs
+$input
+#end for
+;
+mv output.dist $distribution
+]]>
+ </command>
+
+ <inputs>
+ <expand macro="input_sequences_filenames" />
+ <param name="max_size"
+ type="integer"
+ label="Max group size"
+ help="No more than this many number of sequences will be stored in each output (--max-size/-X)"
+ value="1000000" />
+ <param name="min_partition_size"
+ type="integer"
+ label="Min partition size"
+ help="The minimum partition size worth keeping (--min-partition-size/-m)"
+ value="5" />
+ <param name="output_unassigned"
+ type="boolean"
+ checked="false"
+ truevalue="--output-unassigned"
+ falsevalue=""
+ label="Output unassigned sequences (--output-unassigned/-U)" />
+ </inputs>
+ <outputs>
+ <data name="distribution"
+ format="text"
+ label="Partition size distribution from ${tool.name}" />
+ <expand macro="output_sequences" />
+ </outputs>
+
+ <tests>
+ <test>
+ <param name="inputs" value="random-20-a.fa.part"/>
+ <output name="distribution">
+ <assert_contents>
+ <has_line_matching
+ expression="90 1 3 98" />
+ </assert_contents>
+ </output>
+ </test>
+
+ </tests>
+ <help><![CDATA[
+Separate sequences that are annotated with partitions into grouped files.
+]]>
+ </help>
+ <citations>
+ <expand macro="software-citation" />
+ <expand macro="graph-citation" />
+ </citations>
+</tool>
b
diff -r 000000000000 -r 0200bae65db6 filter-abund.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-abund.xml Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,92 @@
+<tool id="gedlab-khmer-filter-abund"
+ name="Filter by abundance"
+ version="2.0rc1-1">
+
+ <description>
+ Trims fastq/fasta sequences at k-mers of a given abundance
+ based on a provided k-mer counting table.
+ </description>
+ <macros>
+ <token name="@BINARY@">filter-abund.py</token>
+ <import>macros.xml</import>
+ </macros>
+ <expand macro="requirements" />
+ <expand macro="stdio" />
+ <expand macro="version" />
+ <command><![CDATA[
+mkdir output; cd output;
+@BINARY@
+#if $cutoff != 2
+  --cutoff=$cutoff
+#end if
+$variable_coverage
+@THREADS@
+$input_counting_table_filename
+#for input in $inputs
+ $input
+#end for
+--out $output
+]]>
+ </command>
+
+ <inputs>
+ <expand macro="input_sequences_filenames" />
+ <param name="variable_coverage"
+ type="boolean"
+ checked="false"
+ truevalue="--variable-coverage"
+ falsevalue=""
+ label="Variable coverage"
+ help="Only trim when a sequence has high enough coverage; median abundance > 20 (--variable_coverage)" />
+ <param name="cutoff"
+ type="integer"
+ value="2"
+ label="cutoff"
+ help="Trim at k-mers below this abundance. (--cutoff)" />
+ <expand macro="input_counting_table_filename" />
+ </inputs>
+ <outputs>
+ <!-- <expand macro="output_sequences" /> -->
+ <expand macro="output_sequences_single" />
+ </outputs>
+ <tests>
+                <test>
+                        <param name="inputs" value="test-abund-read-2.fa" />
+ <param name="input_counting_table_filename"
+ value="test-abund-read-2.ct" ftype="ct" />
+                        <output name="output">
+ <!-- <discover_dataset name="test-abund-read-2.fa.abundfilt"> -->
+                                 <assert_contents>
+                                         <has_text text="GGTTGACGGGGCTCAGGG" />
+                                 </assert_contents>
+ <!-- </discover_dataset> -->
+                        </output>
+                </test>
+                <test>
+ <param name="input_sequence_filename"
+ value="test-abund-read-2.fa" />
+ <param name="input_counting_table_filename"
+ value="test-abund-read-2.ct" ftype="ct" />
+ <param name="cutoff" value="1" />
+                        <output name="output">
+ <!-- <discover_dataset name="test-abund-read-2.fa.abundfilt"> -->
+                                 <assert_contents>
+                                         <has_text text="GGTTGACGGGGCTCAGGG" />
+                                 </assert_contents>
+ <!-- </discover_dataset> -->
+                        </output>
+                </test>
+        </tests>
+ <help><![CDATA[
+Trim sequences at a minimum k-mer abundance.
+
+Trimmed sequences will be placed in ${input_sequence_filename}.abundfilt
+for each input sequence file. If the input sequences are from RNAseq or
+metagenome sequencing then :option:`--variable-coverage` should be used.
+]]>
+ </help>
+ <citations>
+ <expand macro="software-citation" />
+ <expand macro="counting-citation" />
+ </citations>
+</tool>
b
diff -r 000000000000 -r 0200bae65db6 filter-below-abund.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-below-abund.py Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,59 @@
+#! /usr/bin/env python
+#
+# This file is part of khmer, https://github.com/dib-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
+# Contact: khmer-project@idyll.org
+#
+from __future__ import print_function
+import sys
+import os
+import khmer
+from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter
+
+WORKER_THREADS = 8
+GROUPSIZE = 100
+
+CUTOFF = 50
+
+###
+
+
+def main():
+    counting_ht = sys.argv[1]
+    infiles = sys.argv[2:]
+
+    print('file with ht: %s' % counting_ht)
+    print('-- settings:')
+    print('N THREADS', WORKER_THREADS)
+    print('--')
+
+    print('making hashtable')
+    ht = khmer.load_counting_hash(counting_ht)
+    K = ht.ksize()
+
+    for infile in infiles:
+        print('filtering', infile)
+        outfile = os.path.basename(infile) + '.below'
+
+        outfp = open(outfile, 'w')
+
+        def process_fn(record, ht=ht):
+            name = record['name']
+            seq = record['sequence']
+            if 'N' in seq:
+                return None, None
+
+            trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF)
+
+            if trim_at >= K:
+                return name, trim_seq
+
+            return None, None
+
+        tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE)
+
+        tsp.start(verbose_fasta_iter(infile), outfp)
+
+if __name__ == '__main__':
+    main()
b
diff -r 000000000000 -r 0200bae65db6 filter-below-abund.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-below-abund.xml Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,65 @@
+<tool id="gedlab-khmer-filter-below-abund"
+ name="Filter below abundance cutoff of 50"
+ version="2.0rc1-1">
+
+<!-- Work in progress, gating on filter-below-abund.py being upgraded -->
+ <description>
+ Trims fastq/fasta sequences at k-mers with abundance below 50
+ based on a provided k-mer counting table.
+ </description>
+ <macros>
+ <token name="@BINARY@">filter-below-abund.py</token>
+ <import>macros.xml</import>
+ </macros>
+ <expand macro="requirements" />
+ <expand macro="stdio" />
+ <expand macro="version" />
+ <command>
+mkdir output; cd output;
+@BINARY@
+$input_counting_table_filename
+#for input in $inputs
+ $input
+#end for
+ </command>
+
+ <inputs>
+ <expand macro="input_sequences_filenames" />
+ <expand macro="input_counting_table_filename" />
+ </inputs>
+ <outputs>
+ <!-- <expand macro="output_sequences" /> -->
+ <expand macro="output_sequences_single" />
+ </outputs>
+ <!--        <tests>
+                <test>
+                        <param name="inputs" value="test-abund-read-2.fa" />
+                        <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" />
+                        <output name="output">
+ <discover_dataset name="test-abund-read-2.fa.abundfilt">
+ </discover_dataset>
+                        </output>
+                </test>
+                <test>
+                        <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+                        <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" />
+ <param name="cutoff" value="1" />
+                        <output name="output">
+ <discover_dataset name="test-abund-read-2.fa.abundfilt">
+                                 <assert_contents>
+                                         <has_text text="GGTTGACGGGGCTCAGGG" />
+                                 </assert_contents>
+ </discover_dataset>
+                        </output>
+                </test>
+ </tests> -->
+ <citations>
+ <expand macro="software-citation" />
+ <expand macro="counting-citation" />
+ </citations>
+ <!-- [OPTIONAL] ReST Help displayed in Galaxy -->
+    <!--
+ <help>
+ </help>
+    -->
+</tool>
b
diff -r 000000000000 -r 0200bae65db6 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,160 @@
+<macros>
+ <xml name="requirements">
+ <requirements>
+ <requirement type="package" version="2.0rc1">khmer</requirement>
+ </requirements>
+ </xml>
+ <xml name="version">
+ <version_command>@BINARY@ --version</version_command>
+ </xml>
+ <token name="@TABLEPARAMS@">#if $parameters.type == "simple"
+  --ksize=20
+  --n_tables=4
+  --max-tablesize=$parameters.tablesize
+#else
+  --ksize=$parameters.ksize
+  --n_tables=$parameters.n_tables
+  --max-tablesize="$parameters.tablesize_specific"
+  #end if</token>
+ <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token>
+ <xml name="tableinputs">
+ <conditional name="parameters">
+ <param name="type"
+ type="select"
+ label="Advanced Parameters"
+ help="ksize, n_tables, a specific tablesize" >
+ <option value="simple"
+ selected="true">
+ Hide
+ </option>
+ <option value="specific">
+ Show
+ </option>
+             </param>
+ <when value="simple">
+ <param name="tablesize"
+ type="select"
+ label="Sample Type"
+ display="radio">
+ <option value="1e9"
+ selected="true">
+ Microbial Genome
+ </option>
+ <option value="2e9">
+ Animal Transcriptome
+ </option>
+ <option value="4e9">
+ Small Animal Genome or
+ Low-Diversity Metagenome
+ </option>
+ <option value="16e9">
+ Large Animal Genome
+ </option>
+ </param>
+ </when>
+ <when value="specific">
+ <param name="ksize"
+ type="integer"
+ value="20"
+ label="ksize"
+ help="k-mer size to use" />
+ <param name="n_tables"
+ type="integer"
+ min="1"
+ value="4"
+ label="n_tables"
+ help="number of tables to use" />
+ <param name="tablesize_specific"
+ type="text"
+ label="tablesize"
+ help="lower bound on the tablesize to use" />
+ </when>
+ </conditional>
+ </xml>
+ <xml name="input_sequences_filenames">
+                <param  name="inputs"
+                        multiple="true"
+                        type="data"
+                        format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina"
+                        label="FAST[AQ] file(s)"
+                        help="Put in order of precedence such as longest reads first." />
+ </xml>
+ <xml name="input_sequence_filename">
+                <param  name="input_sequence_filename"
+                        type="data"
+                        format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina"
+                        label="FAST[AQ] file(s)" />
+ </xml>
+ <xml name="input_counting_table_filename">
+                <param  name="input_counting_table_filename"
+ type="data"
+ format="ct"
+                        label="the k-mer counting table to query"
+                        help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." />
+ </xml>
+ <xml name="abundance-histogram-output">
+                <data   name="output_histogram_filename"
+                        format="txt"
+                        label="${tool.name} k-mer abundance histogram. The
+                        columns are: (1) k-mer abundance, (2) k-mer count, (3)
+                        cumulative count, (4) fraction of total distinct
+                        k-mers.">
+                </data>
+ </xml>
+ <xml name="output_sequences">
+                <data   name="output"
+                        format_source="inputs"
+                        label="${tool.name} processed nucleotide sequence file">
+                        <discover_datasets pattern="__name__" directory="output" visible="true"/>
+                </data>
+ </xml>
+ <xml name="output_sequences_single">
+                <data   name="output"
+                        format_source="input_sequence_filename"
+                        label="${tool.name} processed nucleotide sequence file" />
+ </xml>
+ <xml name="input_zero">
+                <param  name="zero"
+                        type="boolean"
+                        truevalue=""
+                        falsevalue="--no-zero"
+                        checked="true" 
+                        help="Output zero count bins (--no-zero)" />
+ </xml>
+ <xml name="software-citation">
+ <citation type="bibtex">@article{khmer2014,
+      author = "Crusoe, Michael and Edvenson, Greg and Fish, Jordan and Howe,
+  Adina and McDonald, Eric and Nahum, Joshua and Nanlohy, Kaben and
+  Ortiz-Zuazaga, Humberto and Pell, Jason and Simpson, Jared and Scott, Camille
+  and Srinivasan, Ramakrishnan Rajaram and Zhang, Qingpeng and Brown, C. Titus",
+      title = "The khmer software package: enabling efficient sequence
+  analysis",
+      year = "2014",
+      month = "04",
+      publisher = "Figshare",
+      url = "http://dx.doi.org/10.6084/m9.figshare.979190"
+  }</citation>
+ </xml>
+ <xml name="diginorm-citation">
+ <citation type="bibtex">@unpublished{diginorm,
+    author = "Brown, C Titus and Howe, Adina and Zhang, Qingpeng and Pyrkosz,
+Alexis B and Brom, Timothy H",
+    title = "A Reference-Free Algorithm for Computational Normalization of
+Shotgun Sequencing Data",
+    year = "2012",
+    eprint = "arXiv:1203.4802",
+    url = "http://arxiv.org/abs/1203.4802",
+}</citation></xml>
+ <xml name="graph-citation">
+ <citation type="doi">10.1073/pnas.1121464109</citation>
+ </xml>
+ <xml name="counting-citation">
+ <citation type="doi">10.1371/journal.pone.0101271</citation>
+ </xml>
+ <xml name="stdio">
+ <stdio>
+ <exit_code range="1:"
+ level="fatal" />
+ </stdio>
+ </xml>
+</macros>
b
diff -r 000000000000 -r 0200bae65db6 normalize-by-median.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/normalize-by-median.xml Tue Jul 07 12:29:45 2015 -0400
[
@@ -0,0 +1,150 @@
+<tool id="gedlab-khmer-normalize-by-median"
+ name="Normalize By Median"
+ version="2.0rc1-1">
+
+ <description>
+ Filters a fastq/fasta file using digital normalization via
+      median k-mer abundances.
+ </description>
+        <macros>
+                <token name="@BINARY@">normalize-by-median.py</token>
+                <import>macros.xml</import>
+        </macros>
+        <expand macro="requirements" />
+ <expand macro="stdio" />
+ <expand macro="version" />
+ <command><![CDATA[ 
+mkdir output;
+cd output;
+normalize-by-median.py
+$paired_switch
+@TABLEPARAMS@
+--cutoff=$cutoff
+#if $save_countingtable
+--savetable=$countingtable
+#end if
+#if $countingtable_to_load
+--loadtable=$countingtable_to_load
+#end if
+--report-total-kmers
+#for entry in $many_inputs
+#for input in $entry.inputs
+$input
+#end for
+#end for
+--out=$output
+]]>
+ </command>
+ <inputs>
+ <repeat name="many_inputs" title="input(s) set" min="1" default="1">
+ <expand macro="input_sequences_filenames" />
+ </repeat>
+ <param name="paired_switch"
+ type="boolean"
+ checked="false"
+ truevalue="--paired"
+ falsevalue=""
+ label="Are the inputs interleaved paired ends?"
+ help="(--paired) If so, then selecting this option will process the paired ends together." />
+
+ <param name="countingtable_to_load"
+ type="data"
+ format="ct"
+ optional="true"
+ label="an optional k-mer counting table to load"
+ help="(--loadtable) The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file as a starting point." />
+
+ <param name="save_countingtable"
+ type="boolean"
+ label="Save the k-mer counting table(s) in a file"
+ help="(--savetable)" />
+ <param name="cutoff"
+ type="integer"
+ min="1"
+ value="20"
+ label="cutoff"
+ help="(--cutoff)"/>
+ <expand macro="tableinputs" />
+ </inputs>
+ <outputs>
+ <data name="countingtable"
+ format="ct"
+ label="${tool.name} k-mer counting table">
+ <filter>save_countingtable == True</filter>
+ </data>
+ <!-- <expand macro="output_sequences" /> -->
+ <expand macro="output_sequences_single" />
+ </outputs>
+ <tests>
+ <test>
+ <conditional name="parameters">
+ <param name="type" value="specific" />
+ <param name="inputs" value="test-abund-read-2.fa"/>
+ <param name="cutoff" value="1" />
+ <param name="ksize" value="17" />
+ </conditional>
+ <output name="output">
+ <discover_dataset name="test-abund-read-2.fa.keep">
+ <assert_contents>
+ <has_line_matching expression="GGTTGACGGGGCTCAGGGGG" />
+ </assert_contents>
+ </discover_dataset>
+ </output>
+ </test>
+ <test>
+ <param name="inputs" value="test-abund-read-2.fa" />
+ <param name="cutoff" value="2" />
+ <param name="ksize" value="17" />
+ <output name="output">
+ <discover_dataset name="test-abund-read-2.fa.keep">
+ <assert_contents>
+ <has_line_matching expression="GGTTGACGGGGCTCAGGGGG" />
+ <has_line_matching expression="GGTTGACGGGGCTCAGGG" />
+ </assert_contents>
+ </discover_dataset>
+ </output>
+ </test>
+ <test>
+ <param name="inputs" value="test-abund-read-paired.fa" />
+ <param name="cutoff" value="1" />
+ <param name="ksize" value="17" />
+ <param name="paired" value="true" />
+ <output name="output">
+ <discover_dataset name="test-abund-read-paired.fa.keep">
+ <assert_contents>
+ <has_line_matching expression="GGTTGACGGGGCTCAGGGGG" />
+ <has_line_matching expression="GGTTGACGGGGCTCAGGG" />
+ </assert_contents>
+ </discover_dataset>
+ </output>
+ </test>
+
+ </tests>
+ <help><![CDATA[
+Do digital normalization (remove mostly redundant sequences)
+
+Discard sequences based on whether or not their median k-mer abundance lies
+above a specified cutoff. Kept sequences will be placed in <fileN>.keep.
+
+Paired end reads will be considered together if :option:`-p` is set. If
+either read will be kept, then both will be kept. This should result in
+keeping (or discarding) each sequencing fragment. This helps with retention
+of repeats, especially.
+
+With :option:`-s`/:option:`--savetable`, the k-mer counting table
+will be saved to the specified file after all sequences have been
+processed. With :option:`-d`, the k-mer counting table will be
+saved every d files for multifile runs; if :option:`-s` is set,
+the specified name will be used, and if not, the name `backup.ct`
+will be used.  :option:`-l`/:option:`--loadtable` will load the
+specified k-mer counting table before processing the specified
+files.  Note that these tables are are in the same format as those
+produced by :program:`load-into-counting.py` and consumed by
+:program:`abundance-dist.py`.
+]]>
+ </help>
+ <citations>
+ <expand macro="software-citation" />
+ <expand macro="diginorm-citation" />
+ </citations>
+</tool>
b
diff -r 000000000000 -r 0200bae65db6 out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/out Tue Jul 07 12:29:45 2015 -0400
b
@@ -0,0 +1,2 @@
+1 96 96 0.98
+1001 2 98 1.0
b
diff -r 000000000000 -r 0200bae65db6 out2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/out2 Tue Jul 07 12:29:45 2015 -0400
b
@@ -0,0 +1,2 @@
+1 96 96 0.98
+255 2 98 1.0
b
diff -r 000000000000 -r 0200bae65db6 repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Tue Jul 07 12:29:45 2015 -0400
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the Count and Presence Table datatype definitions.">
+     <repository changeset_revision="3641a7d3b7c4" name="oxli_datatypes" owner="crusoe" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
b
diff -r 000000000000 -r 0200bae65db6 test-data/random-20-a.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random-20-a.fa Tue Jul 07 12:29:45 2015 -0400
b
@@ -0,0 +1,198 @@
+>35
+CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
+>16
+CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
+>46
+GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
+>40
+GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG
+>33
+GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG
+>98
+ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC
+>17
+CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA
+>89
+GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA
+>30
+GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA
+>82
+ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT
+>60
+GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA
+>83
+CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG
+>12
+AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG
+>85
+CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC
+>2
+CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC
+>45
+ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA
+>11
+GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG
+>39
+CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA
+>26
+AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA
+>75
+GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG
+>81
+GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG
+>97
+ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC
+>13
+AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT
+>92
+ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG
+>56
+AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA
+>61
+TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA
+>96
+ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG
+>31
+CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC
+>29
+TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT
+>54
+TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC
+>0
+TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT
+>90
+GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG
+>34
+TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG
+>43
+AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG
+>8
+ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG
+>37
+TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC
+>51
+ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG
+>32
+GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG
+>78
+TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC
+>18
+CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG
+>36
+TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA
+>53
+ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC
+>24
+AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC
+>7
+AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC
+>9
+AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT
+>47
+CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA
+>62
+ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA
+>79
+TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG
+>48
+TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC
+>66
+GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA
+>25
+GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC
+>5
+TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC
+>72
+ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT
+>76
+CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT
+>69
+GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT
+>87
+CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT
+>27
+TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC
+>77
+TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC
+>95
+TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC
+>63
+TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC
+>38
+CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG
+>20
+GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC
+>88
+GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT
+>49
+TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG
+>91
+TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG
+>86
+CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA
+>42
+CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC
+>70
+ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG
+>19
+GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG
+>84
+AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG
+>52
+TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT
+>71
+AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA
+>93
+CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA
+>58
+TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT
+>22
+TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG
+>50
+ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG
+>21
+TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT
+>73
+CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC
+>68
+CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT
+>23
+GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT
+>94
+AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA
+>10
+TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA
+>41
+GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC
+>80
+TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG
+>64
+AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA
+>57
+TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC
+>1
+GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT
+>55
+GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC
+>67
+GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT
+>14
+CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG
+>15
+AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT
+>59
+TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA
+>28
+CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA
+>74
+CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA
+>4
+TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC
+>65
+TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT
+>6
+ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG
+>44
+CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC
+>3
+TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
b
diff -r 000000000000 -r 0200bae65db6 test-data/random-20-a.fa.part
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random-20-a.fa.part Tue Jul 07 12:29:45 2015 -0400
b
@@ -0,0 +1,198 @@
+>35 2
+CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
+>16 3
+CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
+>46 3
+GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
+>40 2
+GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG
+>33 3
+GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG
+>98 0
+ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC
+>17 2
+CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA
+>89 3
+GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA
+>30 2
+GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA
+>82 2
+ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT
+>60 2
+GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA
+>83 2
+CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG
+>12 3
+AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG
+>85 2
+CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC
+>2 2
+CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC
+>45 3
+ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA
+>11 4
+GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG
+>39 4
+CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA
+>26 2
+AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA
+>75 2
+GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG
+>81 2
+GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG
+>97 2
+ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC
+>13 2
+AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT
+>92 2
+ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG
+>56 2
+AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA
+>61 2
+TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA
+>96 2
+ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG
+>31 2
+CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC
+>29 2
+TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT
+>54 2
+TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC
+>0 2
+TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT
+>90 2
+GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG
+>34 2
+TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG
+>43 2
+AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG
+>8 2
+ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG
+>37 2
+TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC
+>51 2
+ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG
+>32 2
+GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG
+>78 2
+TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC
+>18 2
+CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG
+>36 2
+TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA
+>53 2
+ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC
+>24 2
+AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC
+>7 2
+AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC
+>9 2
+AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT
+>47 2
+CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA
+>62 2
+ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA
+>79 2
+TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG
+>48 2
+TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC
+>66 2
+GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA
+>25 2
+GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC
+>5 2
+TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC
+>72 2
+ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT
+>76 2
+CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT
+>69 2
+GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT
+>87 2
+CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT
+>27 2
+TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC
+>77 2
+TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC
+>95 2
+TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC
+>63 2
+TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC
+>38 2
+CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG
+>20 2
+GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC
+>88 2
+GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT
+>49 2
+TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG
+>91 2
+TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG
+>86 2
+CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA
+>42 2
+CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC
+>70 2
+ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG
+>19 2
+GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG
+>84 2
+AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG
+>52 2
+TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT
+>71 2
+AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA
+>93 2
+CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA
+>58 2
+TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT
+>22 2
+TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG
+>50 2
+ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG
+>21 2
+TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT
+>73 2
+CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC
+>68 2
+CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT
+>23 2
+GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT
+>94 2
+AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA
+>10 2
+TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA
+>41 2
+GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC
+>80 2
+TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG
+>64 2
+AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA
+>57 2
+TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC
+>1 2
+GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT
+>55 2
+GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC
+>67 2
+GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT
+>14 2
+CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG
+>15 2
+AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT
+>59 2
+TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA
+>28 2
+CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA
+>74 2
+CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA
+>4 2
+TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC
+>65 2
+TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT
+>6 2
+ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG
+>44 2
+CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC
+>3 2
+TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
b
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.ct
b
Binary file test-data/test-abund-read-2.ct has changed
b
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.ct.info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.ct.info Tue Jul 07 12:29:45 2015 -0400
b
@@ -0,0 +1,3 @@
+through test-data/test-abund-read-2.fa
+fp rate estimated to be 0.000
+
b
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.fa Tue Jul 07 12:29:45 2015 -0400
b
b'@@ -0,0 +1,2002 @@\n+>895:1:37:17593:9954/1\n+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGG'..b'\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n'
b
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.nobigcount.ct
b
Binary file test-data/test-abund-read-2.nobigcount.ct has changed
b
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.nobigcount.ct.info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.nobigcount.ct.info Tue Jul 07 12:29:45 2015 -0400
b
@@ -0,0 +1,3 @@
+through test-data/test-abund-read-2.fa
+fp rate estimated to be 0.000
+
b
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-paired.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-paired.fa Tue Jul 07 12:29:45 2015 -0400
b
@@ -0,0 +1,12 @@
+>895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954 1::FOO
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954 2::FOO
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
b
diff -r 000000000000 -r 0200bae65db6 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Jul 07 12:29:45 2015 -0400
b
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="khmer" version="2.0rc1">
+        <install version="1.0">
+     <actions>
+     <action type="setup_python_environment">
+     <repository changeset_revision="44bb4258922f" name="package_python_2_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu">
+ <package name="python" version="2.7" />
+ </repository>
+ <package>https://pypi.python.org/packages/source/k/khmer/khmer-2.0rc1.tar.gz#md5=d8ea5e3ba34de0380007c74d61fc6d1a</package>
+ </action>
+            </actions>
+        </install>
+    </package>
+</tool_dependency>