Next changeset 1:9e66f77aa094 (2015-07-07) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty |
added:
abundance-dist-single.xml abundance-dist.xml count-median.xml do-partition.xml extract-partitions.xml filter-abund.xml filter-below-abund.py filter-below-abund.xml macros.xml normalize-by-median.xml out out2 repository_dependencies.xml test-data/random-20-a.fa test-data/random-20-a.fa.part test-data/test-abund-read-2.ct test-data/test-abund-read-2.ct.info test-data/test-abund-read-2.fa test-data/test-abund-read-2.nobigcount.ct test-data/test-abund-read-2.nobigcount.ct.info test-data/test-abund-read-paired.fa tool_dependencies.xml |
b |
diff -r 000000000000 -r 0200bae65db6 abundance-dist-single.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abundance-dist-single.xml Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,106 @@ +<tool id="gedlab-khmer-abundance-dist-single" + name="Abundance Distribution (all-in-one)" + version="2.0rc1-1"> + + <description> + Calculate abundance distribution of the k-mers in a given + sequence file. + </description> + <macros> + <token name="@BINARY@">abundance-dist-single.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +mkdir output; cd output; +@BINARY@ +@TABLEPARAMS@ +$zero +$bigcount +#if $save_countingtable +--savetable=$optional_output_countingtable +#end if +--report-total-kmers +--squash +@THREADS@ +$input_sequence_filename +$output_histogram_filename +]]> + </command> + + <inputs> + <expand macro="input_sequence_filename" /> + <param name="save_countingtable" + type="boolean" + label="Save the k-mer counting table(s) in a file" + help="(--savetable)" /> + <expand macro="input_zero" /> + <param name="bigcount" + type="boolean" + truevalue="" + falsevalue="--no-bigcount" + checked="true" + help="Count k-mers past 255 (--no-bigcount)" /> + <expand macro="tableinputs" /> + </inputs> + <outputs> + <data name="optional_output_countingtable" + format="ct" + label="${tool.name} k-mer counting table"> + <filter>save_countingtable == True</filter> + </data> + <expand macro="abundance-histogram-output" /> + </outputs> + <tests> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="type" value="specific" /> + <param name="tablesize_specific" value="1e7" /> + <param name="n_tables" value="2" /> + <param name="ksize" value="17" /> + <param name="no_zero" value="false" /> + <output name="output_histogram_filename"> + <assert_contents> + <has_text text="1 96 96 0.98" /> + <has_text text="1001 2 98 1.0" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="type" value="specific" /> + <param name="tablesize_specific" value="1e7" /> + <param name="n_tables" value="2" /> + <param name="ksize" value="17" /> + <param name="no_zero" value="false" /> + <param name="bigcount" value="false" /> + <output name="output_histogram_filename"> + <assert_contents> + <has_text text="1 96 96 0.98" /> + <has_text text="255 2 98 1.0" /> + </assert_contents> + </output> + </test> + + </tests> + <help><![CDATA[ +Calculate the abundance distribution of k-mers from a single sequence file. + +Note that with :option:`-b` this script is constant memory; in exchange, +k-mer counts will stop at 255. The memory usage of this script with +:option:`-b` will be about 1.15x the product of the :option:`-x` and +:option:`-N` numbers. + +To count k-mers in multiple files use :program:`load_into_counting.py` and +:program:`abundance_dist.py`. +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="counting-citation" /> + </citations> +</tool> |
b |
diff -r 000000000000 -r 0200bae65db6 abundance-dist.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abundance-dist.xml Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,72 @@ +<tool id="gedlab-khmer-abundance-dist" + name="Abundance Distribution" + version="2.0rc1-1"> + + <description> + Calculate abundance distribution of the k-mers in a given sequence + file using a pre-made k-mer counting table. + </description> + <macros> + <token name="@BINARY@">abundance-dist.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +mkdir output; cd output; +@BINARY@ +--squash +$input_counting_table_filename +$input_sequence_filename +$output_histogram_filename +]]> + </command> + + <inputs> + <expand macro="input_counting_table_filename" /> + <expand macro="input_sequence_filename" /> + <expand macro="input_zero" /> + </inputs> + <outputs> + <expand macro="abundance-histogram-output" /> + </outputs> + <tests> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" /> + <param name="no_zero" value="false" /> + <output name="output_histogram_filename"> + <assert_contents> + <has_line_matching expression="1 96 96 0.98" /> + <has_line_matching expression="1001 2 98 1.0" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="input_counting_table_filename" value="test-abund-read-2.nobigcount.ct" ftype="ct" /> + <param name="no_zero" value="false" /> + <assert_stderr> + <has_line_matching expression="WARNING: The loaded graph has bigcount" /> + </assert_stderr> + <output name="output_histogram_filename"> + <assert_contents> + <has_line_matching expression="1 96 96 0.98" /> + <has_line_matching expression="255 2 98 1.0" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +Calculate abundance distribution of the k-mers in the sequence file using a +pre-made k-mer counting table. +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="counting-citation" /> + </citations> +</tool> |
b |
diff -r 000000000000 -r 0200bae65db6 count-median.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/count-median.xml Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,64 @@ +<tool id="gedlab-khmer-count-median" + name="Count Median" + version="2.0rc1-1"> + + <description> + Count the median/avg k-mer abundance for each sequence in the + input file, based on the k-mer counts in the given k-mer + counting table. Can be used to estimate expression levels + (mRNAseq) or coverage (genomic/metagenomic). + </description> + <macros> + <token name="@BINARY@">count-median.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +@BINARY@ +$input_counting_table_filename +$input_sequence_filename +$output_summary_filename +]]> + </command> + + <inputs> + <expand macro="input_sequence_filename" /> + <expand macro="input_counting_table_filename" /> + </inputs> + <outputs> + <data name="output_summary_filename" format="txt" + label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" /> + </outputs> + <tests> + <test> + <param name="input_sequence_filename" + value="test-abund-read-2.fa" /> + <param name="input_counting_table_filename" + value="test-abund-read-2.ct" ftype="ct" /> + <output name="output_summary_filename"> + <assert_contents> + <has_line_matching + expression="seq 1001 1001.0 0.0 18" /> + <has_line_matching + expression="895:1:37:17593:9954/1 1 103.803741455 303.702941895 114" /> + </assert_contents> + </output> + </test> + </tests> + <help> +Count the median/avg k-mer abundance for each sequence in the input file, +based on the k-mer counts in the given k-mer counting table. Can be used to +estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The +output file contains sequence id, median, average, stddev, and seq length; +fields are separated by spaces. For khmer 1.x count-median.py will split +sequence names at the first space which means that some sequence formats (e.g. +paired FASTQ in Casava 1.8 format) will yield uninformative names. Use +:option:`--csv` to fix this behavior. + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="diginorm-citation" /> + </citations> +</tool> |
b |
diff -r 000000000000 -r 0200bae65db6 do-partition.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/do-partition.xml Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,80 @@ +<tool id="gedlab-khmer-do-partition" + name="Sequence partition all-in-one" + version="2.0rc1-1"> + + <description> + Load, partition, and annotate FAST[AQ] sequences + </description> + <macros> + <token name="@BINARY@">do-parition.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +mkdir -p output; +@BINARY@ +@TABLEPARAMS@ +@THREADS@ +output +#for input in $inputs +$input +#end for ; +mv output.info $infomation ; +mv *.part output/ +]]> + </command> + + <inputs> + <expand macro="input_sequences_filenames" /> + <param name="ksize" + type="integer" + value="20" + label="ksize" + help="k-mer size to use (--ksize/-k)" /> + <param name="n_tables" + type="integer" + min="1" + value="4" + label="n_tables" + help="number of tables to use (--n_tables/-N)" /> + <param name="tablesize_specific" + type="text" + label="tablesize" + help="lower bound on the tablesize to use (--min-tablesize/-x)" /> + </inputs> + <outputs> + <data name="information" + format="text" + label="${tool.name} summary for #echo ','.join(map(str, $inputs ))#" /> + <expand macro="output_sequences" /> + </outputs> + <tests> + <test> + <param name="inputs" value="random-20-a.fa"/> + <output name="output"> + <discovered_dataset designation="random-20-a.fa.part"> + <assert_contents> + <has_text text='>35 2' /> + </assert_contents> + </discovered_dataset> + </output> + </test> + </tests> + <help><![CDATA[ +Load in a set of sequences, partition them, merge the partitions, and +annotate the original sequences files with the partition information. + +This script combines the functionality of :program:`load-graph.py`, +:program:`partition-graph.py`, :program:`merge-partitions.py`, and +:program:`annotate-partitions.py` into one script. This is convenient +but should probably not be used for large data sets, because +:program:`do-partition.py` doesn't provide save/resume functionality. +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="graph-citation" /> + </citations> +</tool> |
b |
diff -r 000000000000 -r 0200bae65db6 extract-partitions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract-partitions.xml Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,78 @@ +<tool id="gedlab-khmer-extract-partitions" + name="Extract partitions" + version="2.0rc1-1"> + + <description> + Separate sequences that are annotated with partitions into + grouped files. + </description> + <macros> + <token name="@BINARY@">extract-partitions.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +mkdir -p output ; +cd output ; +@BINARY@ +--max-size $max_size +--min-partition-size $min_partition_size +$output_unassigned +output +#for input in $inputs +$input +#end for +; +mv output.dist $distribution +]]> + </command> + + <inputs> + <expand macro="input_sequences_filenames" /> + <param name="max_size" + type="integer" + label="Max group size" + help="No more than this many number of sequences will be stored in each output (--max-size/-X)" + value="1000000" /> + <param name="min_partition_size" + type="integer" + label="Min partition size" + help="The minimum partition size worth keeping (--min-partition-size/-m)" + value="5" /> + <param name="output_unassigned" + type="boolean" + checked="false" + truevalue="--output-unassigned" + falsevalue="" + label="Output unassigned sequences (--output-unassigned/-U)" /> + </inputs> + <outputs> + <data name="distribution" + format="text" + label="Partition size distribution from ${tool.name}" /> + <expand macro="output_sequences" /> + </outputs> + + <tests> + <test> + <param name="inputs" value="random-20-a.fa.part"/> + <output name="distribution"> + <assert_contents> + <has_line_matching + expression="90 1 3 98" /> + </assert_contents> + </output> + </test> + + </tests> + <help><![CDATA[ +Separate sequences that are annotated with partitions into grouped files. +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="graph-citation" /> + </citations> +</tool> |
b |
diff -r 000000000000 -r 0200bae65db6 filter-abund.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-abund.xml Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,92 @@ +<tool id="gedlab-khmer-filter-abund" + name="Filter by abundance" + version="2.0rc1-1"> + + <description> + Trims fastq/fasta sequences at k-mers of a given abundance + based on a provided k-mer counting table. + </description> + <macros> + <token name="@BINARY@">filter-abund.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +mkdir output; cd output; +@BINARY@ +#if $cutoff != 2 + --cutoff=$cutoff +#end if +$variable_coverage +@THREADS@ +$input_counting_table_filename +#for input in $inputs + $input +#end for +--out $output +]]> + </command> + + <inputs> + <expand macro="input_sequences_filenames" /> + <param name="variable_coverage" + type="boolean" + checked="false" + truevalue="--variable-coverage" + falsevalue="" + label="Variable coverage" + help="Only trim when a sequence has high enough coverage; median abundance > 20 (--variable_coverage)" /> + <param name="cutoff" + type="integer" + value="2" + label="cutoff" + help="Trim at k-mers below this abundance. (--cutoff)" /> + <expand macro="input_counting_table_filename" /> + </inputs> + <outputs> + <!-- <expand macro="output_sequences" /> --> + <expand macro="output_sequences_single" /> + </outputs> + <tests> + <test> + <param name="inputs" value="test-abund-read-2.fa" /> + <param name="input_counting_table_filename" + value="test-abund-read-2.ct" ftype="ct" /> + <output name="output"> + <!-- <discover_dataset name="test-abund-read-2.fa.abundfilt"> --> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + <!-- </discover_dataset> --> + </output> + </test> + <test> + <param name="input_sequence_filename" + value="test-abund-read-2.fa" /> + <param name="input_counting_table_filename" + value="test-abund-read-2.ct" ftype="ct" /> + <param name="cutoff" value="1" /> + <output name="output"> + <!-- <discover_dataset name="test-abund-read-2.fa.abundfilt"> --> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + <!-- </discover_dataset> --> + </output> + </test> + </tests> + <help><![CDATA[ +Trim sequences at a minimum k-mer abundance. + +Trimmed sequences will be placed in ${input_sequence_filename}.abundfilt +for each input sequence file. If the input sequences are from RNAseq or +metagenome sequencing then :option:`--variable-coverage` should be used. +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="counting-citation" /> + </citations> +</tool> |
b |
diff -r 000000000000 -r 0200bae65db6 filter-below-abund.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-below-abund.py Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,59 @@ +#! /usr/bin/env python +# +# This file is part of khmer, https://github.com/dib-lab/khmer/, and is +# Copyright (C) Michigan State University, 2009-2015. It is licensed under +# the three-clause BSD license; see LICENSE. +# Contact: khmer-project@idyll.org +# +from __future__ import print_function +import sys +import os +import khmer +from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter + +WORKER_THREADS = 8 +GROUPSIZE = 100 + +CUTOFF = 50 + +### + + +def main(): + counting_ht = sys.argv[1] + infiles = sys.argv[2:] + + print('file with ht: %s' % counting_ht) + print('-- settings:') + print('N THREADS', WORKER_THREADS) + print('--') + + print('making hashtable') + ht = khmer.load_counting_hash(counting_ht) + K = ht.ksize() + + for infile in infiles: + print('filtering', infile) + outfile = os.path.basename(infile) + '.below' + + outfp = open(outfile, 'w') + + def process_fn(record, ht=ht): + name = record['name'] + seq = record['sequence'] + if 'N' in seq: + return None, None + + trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) + + if trim_at >= K: + return name, trim_seq + + return None, None + + tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) + + tsp.start(verbose_fasta_iter(infile), outfp) + +if __name__ == '__main__': + main() |
b |
diff -r 000000000000 -r 0200bae65db6 filter-below-abund.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-below-abund.xml Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,65 @@ +<tool id="gedlab-khmer-filter-below-abund" + name="Filter below abundance cutoff of 50" + version="2.0rc1-1"> + +<!-- Work in progress, gating on filter-below-abund.py being upgraded --> + <description> + Trims fastq/fasta sequences at k-mers with abundance below 50 + based on a provided k-mer counting table. + </description> + <macros> + <token name="@BINARY@">filter-below-abund.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command> +mkdir output; cd output; +@BINARY@ +$input_counting_table_filename +#for input in $inputs + $input +#end for + </command> + + <inputs> + <expand macro="input_sequences_filenames" /> + <expand macro="input_counting_table_filename" /> + </inputs> + <outputs> + <!-- <expand macro="output_sequences" /> --> + <expand macro="output_sequences_single" /> + </outputs> + <!-- <tests> + <test> + <param name="inputs" value="test-abund-read-2.fa" /> + <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" /> + <output name="output"> + <discover_dataset name="test-abund-read-2.fa.abundfilt"> + </discover_dataset> + </output> + </test> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" /> + <param name="cutoff" value="1" /> + <output name="output"> + <discover_dataset name="test-abund-read-2.fa.abundfilt"> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + </discover_dataset> + </output> + </test> + </tests> --> + <citations> + <expand macro="software-citation" /> + <expand macro="counting-citation" /> + </citations> + <!-- [OPTIONAL] ReST Help displayed in Galaxy --> + <!-- + <help> + </help> + --> +</tool> |
b |
diff -r 000000000000 -r 0200bae65db6 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,160 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.0rc1">khmer</requirement> + </requirements> + </xml> + <xml name="version"> + <version_command>@BINARY@ --version</version_command> + </xml> + <token name="@TABLEPARAMS@">#if $parameters.type == "simple" + --ksize=20 + --n_tables=4 + --max-tablesize=$parameters.tablesize +#else + --ksize=$parameters.ksize + --n_tables=$parameters.n_tables + --max-tablesize="$parameters.tablesize_specific" + #end if</token> + <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token> + <xml name="tableinputs"> + <conditional name="parameters"> + <param name="type" + type="select" + label="Advanced Parameters" + help="ksize, n_tables, a specific tablesize" > + <option value="simple" + selected="true"> + Hide + </option> + <option value="specific"> + Show + </option> + </param> + <when value="simple"> + <param name="tablesize" + type="select" + label="Sample Type" + display="radio"> + <option value="1e9" + selected="true"> + Microbial Genome + </option> + <option value="2e9"> + Animal Transcriptome + </option> + <option value="4e9"> + Small Animal Genome or + Low-Diversity Metagenome + </option> + <option value="16e9"> + Large Animal Genome + </option> + </param> + </when> + <when value="specific"> + <param name="ksize" + type="integer" + value="20" + label="ksize" + help="k-mer size to use" /> + <param name="n_tables" + type="integer" + min="1" + value="4" + label="n_tables" + help="number of tables to use" /> + <param name="tablesize_specific" + type="text" + label="tablesize" + help="lower bound on the tablesize to use" /> + </when> + </conditional> + </xml> + <xml name="input_sequences_filenames"> + <param name="inputs" + multiple="true" + type="data" + format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" + label="FAST[AQ] file(s)" + help="Put in order of precedence such as longest reads first." /> + </xml> + <xml name="input_sequence_filename"> + <param name="input_sequence_filename" + type="data" + format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" + label="FAST[AQ] file(s)" /> + </xml> + <xml name="input_counting_table_filename"> + <param name="input_counting_table_filename" + type="data" + format="ct" + label="the k-mer counting table to query" + help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." /> + </xml> + <xml name="abundance-histogram-output"> + <data name="output_histogram_filename" + format="txt" + label="${tool.name} k-mer abundance histogram. The + columns are: (1) k-mer abundance, (2) k-mer count, (3) + cumulative count, (4) fraction of total distinct + k-mers."> + </data> + </xml> + <xml name="output_sequences"> + <data name="output" + format_source="inputs" + label="${tool.name} processed nucleotide sequence file"> + <discover_datasets pattern="__name__" directory="output" visible="true"/> + </data> + </xml> + <xml name="output_sequences_single"> + <data name="output" + format_source="input_sequence_filename" + label="${tool.name} processed nucleotide sequence file" /> + </xml> + <xml name="input_zero"> + <param name="zero" + type="boolean" + truevalue="" + falsevalue="--no-zero" + checked="true" + help="Output zero count bins (--no-zero)" /> + </xml> + <xml name="software-citation"> + <citation type="bibtex">@article{khmer2014, + author = "Crusoe, Michael and Edvenson, Greg and Fish, Jordan and Howe, + Adina and McDonald, Eric and Nahum, Joshua and Nanlohy, Kaben and + Ortiz-Zuazaga, Humberto and Pell, Jason and Simpson, Jared and Scott, Camille + and Srinivasan, Ramakrishnan Rajaram and Zhang, Qingpeng and Brown, C. Titus", + title = "The khmer software package: enabling efficient sequence + analysis", + year = "2014", + month = "04", + publisher = "Figshare", + url = "http://dx.doi.org/10.6084/m9.figshare.979190" + }</citation> + </xml> + <xml name="diginorm-citation"> + <citation type="bibtex">@unpublished{diginorm, + author = "Brown, C Titus and Howe, Adina and Zhang, Qingpeng and Pyrkosz, +Alexis B and Brom, Timothy H", + title = "A Reference-Free Algorithm for Computational Normalization of +Shotgun Sequencing Data", + year = "2012", + eprint = "arXiv:1203.4802", + url = "http://arxiv.org/abs/1203.4802", +}</citation></xml> + <xml name="graph-citation"> + <citation type="doi">10.1073/pnas.1121464109</citation> + </xml> + <xml name="counting-citation"> + <citation type="doi">10.1371/journal.pone.0101271</citation> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" + level="fatal" /> + </stdio> + </xml> +</macros> |
b |
diff -r 000000000000 -r 0200bae65db6 normalize-by-median.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/normalize-by-median.xml Tue Jul 07 12:29:45 2015 -0400 |
[ |
@@ -0,0 +1,150 @@ +<tool id="gedlab-khmer-normalize-by-median" + name="Normalize By Median" + version="2.0rc1-1"> + + <description> + Filters a fastq/fasta file using digital normalization via + median k-mer abundances. + </description> + <macros> + <token name="@BINARY@">normalize-by-median.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +mkdir output; +cd output; +normalize-by-median.py +$paired_switch +@TABLEPARAMS@ +--cutoff=$cutoff +#if $save_countingtable +--savetable=$countingtable +#end if +#if $countingtable_to_load +--loadtable=$countingtable_to_load +#end if +--report-total-kmers +#for entry in $many_inputs +#for input in $entry.inputs +$input +#end for +#end for +--out=$output +]]> + </command> + <inputs> + <repeat name="many_inputs" title="input(s) set" min="1" default="1"> + <expand macro="input_sequences_filenames" /> + </repeat> + <param name="paired_switch" + type="boolean" + checked="false" + truevalue="--paired" + falsevalue="" + label="Are the inputs interleaved paired ends?" + help="(--paired) If so, then selecting this option will process the paired ends together." /> + + <param name="countingtable_to_load" + type="data" + format="ct" + optional="true" + label="an optional k-mer counting table to load" + help="(--loadtable) The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file as a starting point." /> + + <param name="save_countingtable" + type="boolean" + label="Save the k-mer counting table(s) in a file" + help="(--savetable)" /> + <param name="cutoff" + type="integer" + min="1" + value="20" + label="cutoff" + help="(--cutoff)"/> + <expand macro="tableinputs" /> + </inputs> + <outputs> + <data name="countingtable" + format="ct" + label="${tool.name} k-mer counting table"> + <filter>save_countingtable == True</filter> + </data> + <!-- <expand macro="output_sequences" /> --> + <expand macro="output_sequences_single" /> + </outputs> + <tests> + <test> + <conditional name="parameters"> + <param name="type" value="specific" /> + <param name="inputs" value="test-abund-read-2.fa"/> + <param name="cutoff" value="1" /> + <param name="ksize" value="17" /> + </conditional> + <output name="output"> + <discover_dataset name="test-abund-read-2.fa.keep"> + <assert_contents> + <has_line_matching expression="GGTTGACGGGGCTCAGGGGG" /> + </assert_contents> + </discover_dataset> + </output> + </test> + <test> + <param name="inputs" value="test-abund-read-2.fa" /> + <param name="cutoff" value="2" /> + <param name="ksize" value="17" /> + <output name="output"> + <discover_dataset name="test-abund-read-2.fa.keep"> + <assert_contents> + <has_line_matching expression="GGTTGACGGGGCTCAGGGGG" /> + <has_line_matching expression="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + </discover_dataset> + </output> + </test> + <test> + <param name="inputs" value="test-abund-read-paired.fa" /> + <param name="cutoff" value="1" /> + <param name="ksize" value="17" /> + <param name="paired" value="true" /> + <output name="output"> + <discover_dataset name="test-abund-read-paired.fa.keep"> + <assert_contents> + <has_line_matching expression="GGTTGACGGGGCTCAGGGGG" /> + <has_line_matching expression="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + </discover_dataset> + </output> + </test> + + </tests> + <help><![CDATA[ +Do digital normalization (remove mostly redundant sequences) + +Discard sequences based on whether or not their median k-mer abundance lies +above a specified cutoff. Kept sequences will be placed in <fileN>.keep. + +Paired end reads will be considered together if :option:`-p` is set. If +either read will be kept, then both will be kept. This should result in +keeping (or discarding) each sequencing fragment. This helps with retention +of repeats, especially. + +With :option:`-s`/:option:`--savetable`, the k-mer counting table +will be saved to the specified file after all sequences have been +processed. With :option:`-d`, the k-mer counting table will be +saved every d files for multifile runs; if :option:`-s` is set, +the specified name will be used, and if not, the name `backup.ct` +will be used. :option:`-l`/:option:`--loadtable` will load the +specified k-mer counting table before processing the specified +files. Note that these tables are are in the same format as those +produced by :program:`load-into-counting.py` and consumed by +:program:`abundance-dist.py`. +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="diginorm-citation" /> + </citations> +</tool> |
b |
diff -r 000000000000 -r 0200bae65db6 out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/out Tue Jul 07 12:29:45 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +1 96 96 0.98 +1001 2 98 1.0 |
b |
diff -r 000000000000 -r 0200bae65db6 out2 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/out2 Tue Jul 07 12:29:45 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +1 96 96 0.98 +255 2 98 1.0 |
b |
diff -r 000000000000 -r 0200bae65db6 repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Tue Jul 07 12:29:45 2015 -0400 |
b |
@@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="This requires the Count and Presence Table datatype definitions."> + <repository changeset_revision="3641a7d3b7c4" name="oxli_datatypes" owner="crusoe" toolshed="https://toolshed.g2.bx.psu.edu" /> +</repositories> |
b |
diff -r 000000000000 -r 0200bae65db6 test-data/random-20-a.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/random-20-a.fa Tue Jul 07 12:29:45 2015 -0400 |
b |
@@ -0,0 +1,198 @@ +>35 +CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC +>16 +CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT +>46 +GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT +>40 +GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG +>33 +GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG +>98 +ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC +>17 +CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA +>89 +GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA +>30 +GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA +>82 +ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT +>60 +GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA +>83 +CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG +>12 +AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG +>85 +CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC +>2 +CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC +>45 +ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA +>11 +GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG +>39 +CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA +>26 +AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA +>75 +GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG +>81 +GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG +>97 +ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC +>13 +AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT +>92 +ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG +>56 +AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA +>61 +TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA +>96 +ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG +>31 +CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC +>29 +TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT +>54 +TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC +>0 +TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT +>90 +GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG +>34 +TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG +>43 +AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG +>8 +ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG +>37 +TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC +>51 +ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG +>32 +GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG +>78 +TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC +>18 +CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG +>36 +TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA +>53 +ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC +>24 +AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC +>7 +AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC +>9 +AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT +>47 +CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA +>62 +ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA +>79 +TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG +>48 +TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC +>66 +GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA +>25 +GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC +>5 +TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC +>72 +ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT +>76 +CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT +>69 +GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT +>87 +CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT +>27 +TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC +>77 +TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC +>95 +TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC +>63 +TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC +>38 +CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG +>20 +GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC +>88 +GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT +>49 +TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG +>91 +TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG +>86 +CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA +>42 +CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC +>70 +ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG +>19 +GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG +>84 +AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG +>52 +TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT +>71 +AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA +>93 +CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA +>58 +TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT +>22 +TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG +>50 +ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG +>21 +TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT +>73 +CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC +>68 +CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT +>23 +GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT +>94 +AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA +>10 +TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA +>41 +GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC +>80 +TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG +>64 +AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA +>57 +TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC +>1 +GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT +>55 +GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC +>67 +GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT +>14 +CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG +>15 +AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT +>59 +TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA +>28 +CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA +>74 +CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA +>4 +TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC +>65 +TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT +>6 +ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG +>44 +CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC +>3 +TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG |
b |
diff -r 000000000000 -r 0200bae65db6 test-data/random-20-a.fa.part --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/random-20-a.fa.part Tue Jul 07 12:29:45 2015 -0400 |
b |
@@ -0,0 +1,198 @@ +>35 2 +CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC +>16 3 +CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT +>46 3 +GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT +>40 2 +GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG +>33 3 +GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG +>98 0 +ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC +>17 2 +CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA +>89 3 +GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA +>30 2 +GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA +>82 2 +ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT +>60 2 +GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA +>83 2 +CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG +>12 3 +AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG +>85 2 +CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC +>2 2 +CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC +>45 3 +ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA +>11 4 +GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG +>39 4 +CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA +>26 2 +AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA +>75 2 +GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG +>81 2 +GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG +>97 2 +ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC +>13 2 +AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT +>92 2 +ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG +>56 2 +AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA +>61 2 +TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA +>96 2 +ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG +>31 2 +CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC +>29 2 +TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT +>54 2 +TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC +>0 2 +TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT +>90 2 +GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG +>34 2 +TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG +>43 2 +AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG +>8 2 +ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG +>37 2 +TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC +>51 2 +ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG +>32 2 +GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG +>78 2 +TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC +>18 2 +CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG +>36 2 +TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA +>53 2 +ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC +>24 2 +AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC +>7 2 +AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC +>9 2 +AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT +>47 2 +CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA +>62 2 +ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA +>79 2 +TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG +>48 2 +TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC +>66 2 +GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA +>25 2 +GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC +>5 2 +TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC +>72 2 +ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT +>76 2 +CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT +>69 2 +GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT +>87 2 +CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT +>27 2 +TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC +>77 2 +TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC +>95 2 +TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC +>63 2 +TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC +>38 2 +CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG +>20 2 +GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC +>88 2 +GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT +>49 2 +TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG +>91 2 +TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG +>86 2 +CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA +>42 2 +CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC +>70 2 +ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG +>19 2 +GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG +>84 2 +AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG +>52 2 +TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT +>71 2 +AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA +>93 2 +CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA +>58 2 +TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT +>22 2 +TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG +>50 2 +ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG +>21 2 +TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT +>73 2 +CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC +>68 2 +CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT +>23 2 +GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT +>94 2 +AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA +>10 2 +TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA +>41 2 +GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC +>80 2 +TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG +>64 2 +AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA +>57 2 +TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC +>1 2 +GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT +>55 2 +GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC +>67 2 +GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT +>14 2 +CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG +>15 2 +AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT +>59 2 +TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA +>28 2 +CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA +>74 2 +CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA +>4 2 +TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC +>65 2 +TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT +>6 2 +ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG +>44 2 +CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC +>3 2 +TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG |
b |
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.ct |
b |
Binary file test-data/test-abund-read-2.ct has changed |
b |
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.ct.info --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-2.ct.info Tue Jul 07 12:29:45 2015 -0400 |
b |
@@ -0,0 +1,3 @@ +through test-data/test-abund-read-2.fa +fp rate estimated to be 0.000 + |
b |
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-2.fa Tue Jul 07 12:29:45 2015 -0400 |
b |
b'@@ -0,0 +1,2002 @@\n+>895:1:37:17593:9954/1\n+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGG'..b'\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n+>seq\n+GGTTGACGGGGCTCAGGG\n' |
b |
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.nobigcount.ct |
b |
Binary file test-data/test-abund-read-2.nobigcount.ct has changed |
b |
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-2.nobigcount.ct.info --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-2.nobigcount.ct.info Tue Jul 07 12:29:45 2015 -0400 |
b |
@@ -0,0 +1,3 @@ +through test-data/test-abund-read-2.fa +fp rate estimated to be 0.000 + |
b |
diff -r 000000000000 -r 0200bae65db6 test-data/test-abund-read-paired.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-paired.fa Tue Jul 07 12:29:45 2015 -0400 |
b |
@@ -0,0 +1,12 @@ +>895:1:37:17593:9954/1 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954/2 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954 1::FOO +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954 2::FOO +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954/1 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954/2 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG |
b |
diff -r 000000000000 -r 0200bae65db6 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Jul 07 12:29:45 2015 -0400 |
b |
@@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="khmer" version="2.0rc1"> + <install version="1.0"> + <actions> + <action type="setup_python_environment"> + <repository changeset_revision="44bb4258922f" name="package_python_2_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"> + <package name="python" version="2.7" /> + </repository> + <package>https://pypi.python.org/packages/source/k/khmer/khmer-2.0rc1.tar.gz#md5=d8ea5e3ba34de0380007c74d61fc6d1a</package> + </action> + </actions> + </install> + </package> +</tool_dependency> |