Galaxy |

Changeset 14:3d90a3a78c3b (2015-09-12)

Previous changeset 13:48d7f2580fe5 (2015-09-11) Next changeset 15:2c4635f5be47 (2015-09-18)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit ba6a4d1acd2df42baf4a012bdaac4f6c6051b8dc

modified:
abundance-dist-single.xml
abundance-dist.xml
count-median.xml
filter-abund.xml
filter-below-abund.xml
macros.xml
normalize-by-median.xml

added:
normalize-by-median.cwl
test-data/test-abund-read-2.oxlicg
test-data/test-abund-read-2.oxlicg.info

removed:
abundance-dist-single.norm.xml
filter-abund.norm.xml
test-data/test-abund-read-2.ct
test-data/test-abund-read-2.ct.info
test-data/test-abund-read-2.nobigcount.ct
test-data/test-abund-read-2.nobigcount.ct.info

diff -r 48d7f2580fe5 -r 3d90a3a78c3b abundance-dist-single.norm.xml
--- a/abundance-dist-single.norm.xml Fri Sep 11 14:43:53 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,297 +0,0 @@\n-<tool id="gedlab-khmer-abundance-dist-single" name="Abundance Distribution (all-in-one)" version="2.0-3">\n-\n-\t<description>\n-\t\tCalculate abundance distribution of the k-mers in a given\n-\t\tsequence file.\n-\t</description>\n-\t<macros>\n-\t\t<macro name="requirements" type="xml">\n-\t\t<requirements>\n-\t\t\t<requirement type="package" version="2.0">khmer</requirement>\n-\t\t</requirements>\n-\t</macro>\n-\t<macro name="version" type="xml">\n-\t\t<version_command>abundance-dist-single.py --version</version_command>\n-\t</macro>\n-\t<macro name="tableinputs" type="xml">\n-\t\t<conditional name="parameters">\n-\t\t\t<param help="ksize, n_tables, a specific tablesize" label="Advanced Parameters" name="type" type="select">\n-\t\t\t\t<option selected="true" value="simple">\n-\t\t\t\t\tHide\n-\t\t\t\t</option>\n-\t\t\t\t<option value="specific">\n-\t\t\t\t\tShow\n-\t\t\t\t</option>\n- \t\t</param>\n-\t\t\t<when value="simple">\n-\t\t\t\t<param display="radio" label="Sample Type" name="tablesize" type="select">\n-\t\t\t\t\t<option selected="true" value="1e9">\n-\t\t\t\t\t\tMicrobial Genome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="2e9">\n-\t\t\t\t\t\tAnimal Transcriptome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="4e9">\n-\t\t\t\t\t\tSmall Animal Genome or\n-\t\t\t\t\t\tLow-Diversity Metagenome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="16e9">\n-\t\t\t\t\t\tLarge Animal Genome\n-\t\t\t\t\t</option>\n-\t\t\t\t</param>\n-\t\t\t</when>\n-\t\t\t<when value="specific">\n-\t\t\t\t<param help="k-mer size to use" label="ksize" name="ksize" type="integer" value="20" />\n-\t\t\t\t<param help="number of tables to use" label="n_tables" min="1" name="n_tables" type="integer" value="4" />\n-\t\t\t\t<param help="lower bound on the tablesize to use" label="tablesize" name="tablesize_specific" type="text" />\n-\t\t\t</when>\n-\t\t</conditional>\n-\t</macro>\n-\t<macro name="input_sequences_filenames" type="xml">\n- <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" help="Put in order of precedence such as longest reads first." label="FAST[AQ] file(s)" multiple="true" name="inputs" type="data" />\n-\t</macro>\n-\t<macro name="input_sequence_filename" type="xml">\n- <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" label="FAST[AQ] file(s)" name="input_sequence_filename" type="data" />\n-\t</macro>\n-\t<macro name="input_counting_table_filename" type="xml">\n- <param format="ct" help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." label="the k-mer counting table to query" name="input_counting_table_filename" type="data" />\n-\t</macro>\n-\t<macro name="abundance-histogram-output" type="xml">\n- <data format="txt" label="${tool.name} k-mer abundance histogram. The columns are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct k-mers." name="output_histogram_filename">\n- </data>\n-\t</macro>\n-\t<macro name="output_sequences" type="xml">\n- <data format_source="inputs" label="${tool.name} processed nucleotide sequence file" name="output">\n- <discover_datasets directory="output" pattern="__name__" visible="true" />\n- </data>\n-\t</macro>\n-\t<macro name="output_sequences_single" type="xml">\n- <data format_source="input_sequence_filename" label="${tool.name} processed nucleotide sequence file" name="output" />\n-\t</macro>\n-\t<macro name="input_zero" type="xml">\n- <param checked="true" falsevalue="--no-zero" help="Output zero count bins (--no-zero)" name="zero" truevalue="" type="boolean" />\n-\t</macro>\n-\t<macro name="software-citation" type="xml">\n-\t\t<citation type="bibtex">@article{khmer2015,\n- author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n- and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n- Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n- Jacob and Fenzl, T'..b'lp="number of tables to use" label="n_tables" min="1" name="n_tables" type="integer" value="4" />\n-\t\t\t\t<param help="lower bound on the tablesize to use" label="tablesize" name="tablesize_specific" type="text" />\n-\t\t\t</when>\n-\t\t</conditional>\n-\t</inputs>\n-\t<outputs>\n-\t\t<data format="ct" label="${tool.name} k-mer counting table" name="optional_output_countingtable">\n-\t\t\t<filter>save_countingtable == True</filter>\n-\t\t</data>\n-\t\t<data format="txt" label="${tool.name} k-mer abundance histogram. The columns are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct k-mers." name="output_histogram_filename">\n- </data>\n-\t</outputs>\n- <tests>\n-\t <test>\n-\t\t <param name="input_sequence_filename" value="test-abund-read-2.fa" />\n-\t\t <param name="type" value="specific" />\n-\t\t <param name="tablesize_specific" value="1e7" />\n-\t\t <param name="n_tables" value="2" />\n-\t\t <param name="ksize" value="17" />\n-\t\t <param name="no_zero" value="false" />\n-\t\t <output name="output_histogram_filename">\n-\t\t\t <assert_contents>\n-\t\t\t\t <has_text text="1,96,96,0.98" />\n-\t\t\t\t <has_text text="1001,2,98,1.0" />\n-\t\t\t </assert_contents>\n-\t\t </output>\n-\t </test>\n-\t <test>\n-\t\t <param name="input_sequence_filename" value="test-abund-read-2.fa" />\n-\t\t <param name="type" value="specific" />\n-\t\t <param name="tablesize_specific" value="1e7" />\n-\t\t <param name="n_tables" value="2" />\n-\t\t <param name="ksize" value="17" />\n-\t\t <param name="no_zero" value="false" />\n-\t\t <param name="bigcount" value="false" />\n-\t\t <output name="output_histogram_filename">\n-\t\t\t <assert_contents>\n-\t\t\t\t <has_text text="1,96,96,0.98" />\n-\t\t\t\t <has_text text="255,2,98,1.0" />\n-\t\t\t </assert_contents>\n-\t\t </output>\n-\t </test>\n-\n- </tests>\n- <help>\n-Calculate the abundance distribution of k-mers from a single sequence file.\n-\n-Note that with `-b` this script is constant memory; in exchange,\n-k-mer counts will stop at 255. The memory usage of this script with\n-`-b` will be about 1.15x the product of the `-x` and\n-`-N` numbers.\n-\n-To count k-mers in multiple files use `load_into_counting.py` and\n-`abundance_dist.py`.\n-\n- </help>\n- <citations>\n-\t<citation type="bibtex">@article{khmer2015,\n- author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n- and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n- Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n- Jacob and Fenzl, Thomas and Fish, Jordan and Garcia-Gutierrez, Leonor and\n- Garland, Phillip and Gluck, Jonathan and González, Iván and Guermond, Sarah\n- and Guo, Jiarong and Gupta, Aditi and Herr, Joshua R. and Howe, Adina and\n- Hyer, Alex and Härpfer, Andreas and Irber, Luiz and Kidd, Rhys and Lin, David\n- and Lippi, Justin and Mansour, Tamer and McA\'Nulty, Pamela and McDonald, Eric\n- and Mizzi, Jessica and Murray, Kevin D. and Nahum, Joshua R. and Nanlohy,\n- Kaben and Nederbragt, Alexander Johan and Ortiz-Zuazaga, Humberto and Ory,\n- Jeramia and Pell, Jason and Pepe-Ranney, Charles and Russ, Zachary N and\n- Schwarz, Erich and Scott, Camille and Seaman, Josiah and Sievert, Scott and\n- Simpson, Jared and Skennerton, Connor T. and Spencer, James and Srinivasan,\n- Ramakrishnan and Standage, Daniel and Stapleton, James A. and Stein, Joe and\n- Steinman, Susan R and Taylor, Benjamin and Trimble, Will and Wiencko, Heather\n- L. and Wright, Michael and Wyss, Brian and Zhang, Qingpeng and zyme, en and\n- Brown, C. Titus"\n- title = "The khmer software package: enabling efficient nucleotide\n- sequence analysis",\n- year = "2015",\n- month = "08",\n- publisher = "F1000",\n- url = "http://dx.doi.org/10.12688/f1000research.6924.1"\n- }</citation>\n-\t<citation type="doi">10.1371/journal.pone.0101271</citation>\n-\t</citations>\n-</tool>\n'

diff -r 48d7f2580fe5 -r 3d90a3a78c3b abundance-dist-single.xml
--- a/abundance-dist-single.xml Fri Sep 11 14:43:53 2015 -0400
+++ b/abundance-dist-single.xml Sat Sep 12 21:05:57 2015 -0400

@@ -1,6 +1,6 @@
<tool id="gedlab-khmer-abundance-dist-single"
name="Abundance Distribution (all-in-one)"
- version="2.0-3">
+ version="2.0-4">

<description>
Calculate abundance distribution of the k-mers in a given
@@ -21,8 +21,8 @@
@TABLEPARAMS@
$zero
$bigcount
-#if $save_countingtable
---savetable=$optional_output_countingtable
+#if $save_countgraph
+--savegraph=$optional_output_countgraph
#end if
--squash
@THREADS@
@@ -33,24 +33,19 @@

<inputs>
<expand macro="input_sequence_filename" />
- <param name="save_countingtable"
+ <param name="save_countgraph"
type="boolean"
- label="Save the k-mer counting table(s) in a file"
- help="(--savetable)" />
+ label="Save the k-mer countgraph to a file"
+ help="(--savegraph)" />
<expand macro="input_zero" />
- <param name="bigcount"
- type="boolean"
- truevalue=""
- falsevalue="--no-bigcount"
- checked="true"
- help="Count k-mers past 255 (--no-bigcount)" />
+ <expand macro="input_bigcount" />
<expand macro="tableinputs" />
</inputs>
<outputs>
- <data name="optional_output_countingtable"
- format="ct"
- label="${tool.name} k-mer counting table">
- <filter>save_countingtable == True</filter>
+ <data name="optional_output_countgraph"
+ format="oxlicg"
+ label="${tool.name} k-mer countgraph">
+ <filter>save_countgraph == True</filter>
</data>
<expand macro="abundance-histogram-output" />
</outputs>

diff -r 48d7f2580fe5 -r 3d90a3a78c3b abundance-dist.xml
--- a/abundance-dist.xml Fri Sep 11 14:43:53 2015 -0400
+++ b/abundance-dist.xml Sat Sep 12 21:05:57 2015 -0400

[

@@ -1,10 +1,10 @@
<tool id="gedlab-khmer-abundance-dist"
name="Abundance Distribution"
- version="2.0-2">
+ version="2.0-3">

<description>
Calculate abundance distribution of the k-mers in a given sequence
- file using a pre-made k-mer counting table.
+ file using a pre-made k-mer countgraph.
</description>
         <macros>
<token name="@BINARY@">abundance-dist.py</token>
@@ -19,16 +19,19 @@
mkdir output; cd output;
@BINARY@
--squash
-$input_counting_table_filename
+$zero
+$bigcount
+$input_countgraph_filename
$input_sequence_filename
$output_histogram_filename
]]>
</command>

<inputs>
- <expand macro="input_counting_table_filename" />
+ <expand macro="input_countgraph_filename" />
<expand macro="input_sequence_filename" />
<expand macro="input_zero" />
+ <expand macro="input_bigcount" />
</inputs>
<outputs>
<expand macro="abundance-histogram-output" />
@@ -36,33 +39,31 @@
<tests>
<test>
                      <param name="input_sequence_filename" value="test-abund-read-2.fa" />
- <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" />
- <param name="no_zero" value="false" />
+ <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+ <param name="zero" value="false" />
                      <output name="output_histogram_filename">
<assert_contents>
- <has_line_matching expression="1,96,96,0.98" />
- <has_line_matching expression="1001,2,98,1.0" />
+ <has_text text="1,96,96,0.98" />
+ <has_text text="1001,2,98,1.0" />
</assert_contents>
</output>
</test>
<test>
                      <param name="input_sequence_filename" value="test-abund-read-2.fa" />
- <param name="input_counting_table_filename" value="test-abund-read-2.nobigcount.ct" ftype="ct" />
- <param name="no_zero" value="false" />
- <assert_stderr>
- <has_line_matching expression="WARNING: The loaded graph has bigcount" />
- </assert_stderr>
+ <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+ <param name="zero" value="false" />
+ <param name="bigcount" value="false" />
                      <output name="output_histogram_filename">
                              <assert_contents>
-                                     <has_line_matching expression="1,96,96,0.98" />
-                                     <has_line_matching expression="255,2,98,1.0" />
+                                     <has_text text="1,96,96,0.98" />
+                                     <has_text text="255,2,98,1.0" />
                              </assert_contents>
                      </output>
              </test>
      </tests>
<help><![CDATA[
Calculate abundance distribution of the k-mers in the sequence file using a
-pre-made k-mer counting table.
+pre-made k-mer countgraph.
]]>
</help>
<citations>

diff -r 48d7f2580fe5 -r 3d90a3a78c3b count-median.xml
--- a/count-median.xml Fri Sep 11 14:43:53 2015 -0400
+++ b/count-median.xml Sat Sep 12 21:05:57 2015 -0400

[

@@ -5,7 +5,7 @@
<description>
Count the median/avg k-mer abundance for each sequence in the
input file, based on the k-mer counts in the given k-mer
- counting table. Can be used to estimate expression levels
+ countgraph. Can be used to estimate expression levels
(mRNAseq) or coverage (genomic/metagenomic).
</description>
         <macros>
@@ -17,7 +17,7 @@
<expand macro="version" />
<command><![CDATA[
@BINARY@
-$input_counting_table_filename
+$input_countgraph_filename
$input_sequence_filename
$output_summary_filename
]]>
@@ -25,7 +25,7 @@

<inputs>
<expand macro="input_sequence_filename" />
- <expand macro="input_counting_table_filename" />
+ <expand macro="input_countgraph_filename" />
</inputs>
<outputs>
<data name="output_summary_filename" format="txt"
@@ -35,21 +35,19 @@
<test>
<param name="input_sequence_filename"
value="test-abund-read-2.fa" />
- <param name="input_counting_table_filename"
- value="test-abund-read-2.ct" ftype="ct" />
+ <param name="input_countgraph_filename"
+ value="test-abund-read-2.oxlicg" ftype="oxlicg" />
                         <output name="output_summary_filename">
                                 <assert_contents>
- <has_line_matching
- expression="seq 1001 1001.0 0.0 18" />
- <has_line_matching
- expression="895:1:37:17593:9954/1 1 103.803741455 303.702941895 114" />
+ <has_text text="seq,1001,1001.0,0.0,18" />
+ <has_text text="895:1:37:17593:9954/1,1,21.408163071,141.391921997,114" />
                                 </assert_contents>
                         </output>
</test>
</tests>
<help>
Count the median/avg k-mer abundance for each sequence in the input file,
-based on the k-mer counts in the given k-mer counting table. Can be used to
+based on the k-mer counts in the given k-mer countgraph. Can be used to
estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The
output file contains sequence id, median, average, stddev, and seq length;
fields are separated by spaces. For khmer 1.x count-median.py will split

diff -r 48d7f2580fe5 -r 3d90a3a78c3b filter-abund.norm.xml
--- a/filter-abund.norm.xml Fri Sep 11 14:43:53 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,241 +0,0 @@\n-<tool id="gedlab-khmer-filter-abund" name="Filter by abundance" version="2.0-3">\n-\n-\t<description>\n-\t\tTrims fastq/fasta sequences at k-mers of a given abundance\n-\t\tbased on a provided k-mer counting table.\n-\t</description>\n-\t<macros>\n-\t\t<macro name="requirements" type="xml">\n-\t\t<requirements>\n-\t\t\t<requirement type="package" version="2.0">khmer</requirement>\n-\t\t</requirements>\n-\t</macro>\n-\t<macro name="version" type="xml">\n-\t\t<version_command>filter-abund.py --version</version_command>\n-\t</macro>\n-\t<macro name="tableinputs" type="xml">\n-\t\t<conditional name="parameters">\n-\t\t\t<param help="ksize, n_tables, a specific tablesize" label="Advanced Parameters" name="type" type="select">\n-\t\t\t\t<option selected="true" value="simple">\n-\t\t\t\t\tHide\n-\t\t\t\t</option>\n-\t\t\t\t<option value="specific">\n-\t\t\t\t\tShow\n-\t\t\t\t</option>\n- \t\t</param>\n-\t\t\t<when value="simple">\n-\t\t\t\t<param display="radio" label="Sample Type" name="tablesize" type="select">\n-\t\t\t\t\t<option selected="true" value="1e9">\n-\t\t\t\t\t\tMicrobial Genome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="2e9">\n-\t\t\t\t\t\tAnimal Transcriptome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="4e9">\n-\t\t\t\t\t\tSmall Animal Genome or\n-\t\t\t\t\t\tLow-Diversity Metagenome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="16e9">\n-\t\t\t\t\t\tLarge Animal Genome\n-\t\t\t\t\t</option>\n-\t\t\t\t</param>\n-\t\t\t</when>\n-\t\t\t<when value="specific">\n-\t\t\t\t<param help="k-mer size to use" label="ksize" name="ksize" type="integer" value="20" />\n-\t\t\t\t<param help="number of tables to use" label="n_tables" min="1" name="n_tables" type="integer" value="4" />\n-\t\t\t\t<param help="lower bound on the tablesize to use" label="tablesize" name="tablesize_specific" type="text" />\n-\t\t\t</when>\n-\t\t</conditional>\n-\t</macro>\n-\t<macro name="input_sequences_filenames" type="xml">\n- <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" help="Put in order of precedence such as longest reads first." label="FAST[AQ] file(s)" multiple="true" name="inputs" type="data" />\n-\t</macro>\n-\t<macro name="input_sequence_filename" type="xml">\n- <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" label="FAST[AQ] file(s)" name="input_sequence_filename" type="data" />\n-\t</macro>\n-\t<macro name="input_counting_table_filename" type="xml">\n- <param format="ct" help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." label="the k-mer counting table to query" name="input_counting_table_filename" type="data" />\n-\t</macro>\n-\t<macro name="abundance-histogram-output" type="xml">\n- <data format="txt" label="${tool.name} k-mer abundance histogram. The columns are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct k-mers." name="output_histogram_filename">\n- </data>\n-\t</macro>\n-\t<macro name="output_sequences" type="xml">\n- <data format_source="inputs" label="${tool.name} processed nucleotide sequence file" name="output">\n- <discover_datasets directory="output" pattern="__name__" visible="true" />\n- </data>\n-\t</macro>\n-\t<macro name="output_sequences_single" type="xml">\n- <data format_source="input_sequence_filename" label="${tool.name} processed nucleotide sequence file" name="output" />\n-\t</macro>\n-\t<macro name="input_zero" type="xml">\n- <param checked="true" falsevalue="--no-zero" help="Output zero count bins (--no-zero)" name="zero" truevalue="" type="boolean" />\n-\t</macro>\n-\t<macro name="software-citation" type="xml">\n-\t\t<citation type="bibtex">@article{khmer2015,\n- author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n- and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n- Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n- Jacob and Fenzl, Thomas '..b'sequence has high enough coverage; median abundance > 20 (--variable_coverage)" label="Variable coverage" name="variable_coverage" truevalue="--variable-coverage" type="boolean" />\n-\t\t<param help="Trim at k-mers below this abundance. (--cutoff)" label="cutoff" name="cutoff" type="integer" value="2" />\n-\t\t<param format="ct" help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." label="the k-mer counting table to query" name="input_counting_table_filename" type="data" />\n-\t</inputs>\n-\t<outputs>\n-\t\t<data format_source="inputs" label="${tool.name} processed nucleotide sequence file" name="output">\n- <discover_datasets directory="output" pattern="__name__" visible="true" />\n- </data>\n-\t</outputs>\n-\t<tests>\n- <test>\n- <param name="inputs" value="test-abund-read-2.fa" />\n-\t\t\t<param ftype="ct" name="input_counting_table_filename" value="test-abund-read-2.ct" />\n- <output name="output">\n-\t\t\t\t<discover_dataset name="test-abund-read-2.fa.abundfilt"> \n- \t<assert_contents>\n- \t<has_text text="GGTTGACGGGGCTCAGGG" />\n- \t</assert_contents>\n-\t\t\t\t</discover_dataset>\n- </output>\n- </test>\n- <test>\n-\t\t\t<param name="inputs" value="test-abund-read-2.fa" />\n-\t\t\t<param ftype="ct" name="input_counting_table_filename" value="test-abund-read-2.ct" />\n-\t\t\t<param name="cutoff" value="1" />\n- <output name="output">\n-\t\t\t\t<discover_dataset name="test-abund-read-2.fa.abundfilt">\n- \t<assert_contents>\n- \t<has_text text="GGTTGACGGGGCTCAGGG" />\n- \t</assert_contents>\n-\t\t\t\t</discover_dataset>\n- </output>\n- </test>\n- </tests>\n-\t<help>\n-Trim sequences at a minimum k-mer abundance.\t\t\n-\t\t\n-Trimmed sequences will be placed in ${input_sequence_filename}.abundfilt\n-for each input sequence file. If the input sequences are from RNAseq or\n-metagenome sequencing then `--variable-coverage` should be used.\n-\n-\t</help>\n-\t<citations>\n-\t\t<citation type="bibtex">@article{khmer2015,\n- author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n- and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n- Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n- Jacob and Fenzl, Thomas and Fish, Jordan and Garcia-Gutierrez, Leonor and\n- Garland, Phillip and Gluck, Jonathan and González, Iván and Guermond, Sarah\n- and Guo, Jiarong and Gupta, Aditi and Herr, Joshua R. and Howe, Adina and\n- Hyer, Alex and Härpfer, Andreas and Irber, Luiz and Kidd, Rhys and Lin, David\n- and Lippi, Justin and Mansour, Tamer and McA\'Nulty, Pamela and McDonald, Eric\n- and Mizzi, Jessica and Murray, Kevin D. and Nahum, Joshua R. and Nanlohy,\n- Kaben and Nederbragt, Alexander Johan and Ortiz-Zuazaga, Humberto and Ory,\n- Jeramia and Pell, Jason and Pepe-Ranney, Charles and Russ, Zachary N and\n- Schwarz, Erich and Scott, Camille and Seaman, Josiah and Sievert, Scott and\n- Simpson, Jared and Skennerton, Connor T. and Spencer, James and Srinivasan,\n- Ramakrishnan and Standage, Daniel and Stapleton, James A. and Stein, Joe and\n- Steinman, Susan R and Taylor, Benjamin and Trimble, Will and Wiencko, Heather\n- L. and Wright, Michael and Wyss, Brian and Zhang, Qingpeng and zyme, en and\n- Brown, C. Titus"\n- title = "The khmer software package: enabling efficient nucleotide\n- sequence analysis",\n- year = "2015",\n- month = "08",\n- publisher = "F1000",\n- url = "http://dx.doi.org/10.12688/f1000research.6924.1"\n- }</citation>\n-\t<citation type="doi">10.1371/journal.pone.0101271</citation>\n-\t</citations>\n-</tool>\n'

diff -r 48d7f2580fe5 -r 3d90a3a78c3b filter-abund.xml
--- a/filter-abund.xml Fri Sep 11 14:43:53 2015 -0400
+++ b/filter-abund.xml Sat Sep 12 21:05:57 2015 -0400

@@ -4,7 +4,7 @@

<description>
Trims fastq/fasta sequences at k-mers of a given abundance
- based on a provided k-mer counting table.
+ based on a provided k-mer countgraph.
</description>
<macros>
<token name="@BINARY@">filter-abund.py</token>
@@ -21,7 +21,7 @@
#end if
$variable_coverage
@THREADS@
-$input_counting_table_filename
+$input_countgraph_filename
#for input in $inputs
  $input
#end for
@@ -42,7 +42,7 @@
value="2"
label="cutoff"
help="Trim at k-mers below this abundance. (--cutoff)" />
- <expand macro="input_counting_table_filename" />
+ <expand macro="input_countgraph_filename" />
</inputs>
<outputs>
<expand macro="output_sequences" />
@@ -50,8 +50,8 @@
<tests>
                 <test>
                         <param name="inputs" value="test-abund-read-2.fa" />
- <param name="input_counting_table_filename"
- value="test-abund-read-2.ct" ftype="ct" />
+ <param name="input_countgraph_filename"
+ value="test-abund-read-2.oxlicg" ftype="oxlicg" />
                         <output name="output">
<discover_dataset name="test-abund-read-2.fa.abundfilt">
                                  <assert_contents>
@@ -62,8 +62,8 @@
                 </test>
                 <test>
<param name="inputs" value="test-abund-read-2.fa" />
- <param name="input_counting_table_filename"
- value="test-abund-read-2.ct" ftype="ct" />
+ <param name="input_countgraph_filename"
+ value="test-abund-read-2.oxlicg" ftype="oxlicg" />
<param name="cutoff" value="1" />
                         <output name="output">
<discover_dataset name="test-abund-read-2.fa.abundfilt">

diff -r 48d7f2580fe5 -r 3d90a3a78c3b filter-below-abund.xml
--- a/filter-below-abund.xml Fri Sep 11 14:43:53 2015 -0400
+++ b/filter-below-abund.xml Sat Sep 12 21:05:57 2015 -0400

@@ -4,7 +4,7 @@

<description>
Trims fastq/fasta sequences at k-mers with abundance below 50
- based on a provided k-mer counting table.
+ based on a provided k-mer countgraph.
</description>
<macros>
<token name="@BINARY@">filter-below-abund.py</token>
@@ -16,7 +16,7 @@
<command>
mkdir output; cd output;
@BINARY@
-$input_counting_table_filename
+$input_countgraph_filename
#for input in $inputs
  $input
#end for
@@ -24,7 +24,7 @@

<inputs>
<expand macro="input_sequences_filenames" />
- <expand macro="input_counting_table_filename" />
+ <expand macro="input_countgraph_filename" />
</inputs>
<outputs>

@@ -33,7 +33,7 @@
<!--        <tests>
                 <test>
                         <param name="inputs" value="test-abund-read-2.fa" />
-                        <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" />
+                        <param name="input_countgraph_filename" value="test-abund-read-2.ct" ftype="oxlicg" />
                         <output name="output">
<discover_dataset name="test-abund-read-2.fa.abundfilt">
</discover_dataset>
@@ -41,7 +41,7 @@
                 </test>
                 <test>
                         <param name="input_sequence_filename" value="test-abund-read-2.fa" />
-                        <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" />
+                        <param name="input_countgraph_filename" value="test-abund-read-2.ct" ftype="oxlicg" />
<param name="cutoff" value="1" />
                         <output name="output">
<discover_dataset name="test-abund-read-2.fa.abundfilt">

diff -r 48d7f2580fe5 -r 3d90a3a78c3b macros.xml
--- a/macros.xml Fri Sep 11 14:43:53 2015 -0400
+++ b/macros.xml Sat Sep 12 21:05:57 2015 -0400

[

@@ -85,12 +85,12 @@
                         format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina"
                         label="FAST[AQ] file(s)" />
</xml>
- <xml name="input_counting_table_filename">
-                <param  name="input_counting_table_filename"
+ <xml name="input_countgraph_filename">
+                <param  name="input_countgraph_filename"
type="data"
- format="ct"
-                        label="the k-mer counting table to query"
-                        help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." />
+ format="oxlicg"
+                        label="the k-mer countgraph to query"
+                        help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." />
</xml>
<xml name="abundance-histogram-output">
                 <data   name="output_histogram_filename"
@@ -121,6 +121,14 @@
                         checked="true"
                         help="Output zero count bins (--no-zero)" />
</xml>
+ <xml name="input_bigcount">
+                <param  name="bigcount"
+                        type="boolean"
+                        truevalue=""
+                        falsevalue="--no-bigcount"
+                        checked="true"
+                        help="Count k-mers past 255 occurences (--no-bigcount)" />
+ </xml>
<xml name="software-citation">
<citation type="bibtex">@article{khmer2015,
      author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine

diff -r 48d7f2580fe5 -r 3d90a3a78c3b normalize-by-median.cwl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/normalize-by-median.cwl Sat Sep 12 21:05:57 2015 -0400

[

@@ -0,0 +1,72 @@
+#!/usr/bin/env cwl-runner
+- arguments:
+  - valueFrom: {engine: '#galaxy_command_line', script: " \nmkdir output;\ncd output;\n\
+        normalize-by-median.py\n$paired_switch\n@TABLEPARAMS@\n--cutoff=$cutoff\n\
+        #if $save_countingtable\n--savetable=$countingtable\n#end if\n#if $countingtable_to_load\n\
+        --loadtable=$countingtable_to_load\n#end if\n--report-total-kmers\n#for entry\
+        \ in $many_inputs\n#for input in $entry.inputs\n$input\n#end for\n#end for\n\
+        --out=$output\n"}
+  baseCommand: [/bin/sh, -c]
+  class: CommandLineTool
+  id: '#gedlab-khmer-normalize-by-median'
+  inputs:
+  - id: '#many_inputs'
+    type:
+      items:
+        fields:
+        - {label: 'FAST[AQ] file(s)', name: inputs, type: File}
+        name: many_inputs
+        type: record
+      type: array
+  - default: ''
+    id: '#paired_switch'
+    label: Are the inputs interleaved paired ends?
+    type:
+      name: paired_switch
+      symbols: ['', --paired]
+      type: enum
+  - {id: '#countingtable_to_load', label: an optional k-mer counting table to load,
+    type: File}
+  - {default: 'false', id: '#save_countingtable', label: Save the k-mer counting table(s)
+      in a file, type: boolean}
+  - {default: 20, id: '#cutoff', label: cutoff, type: int}
+  - id: '#parameters'
+    type:
+    - fields:
+      - label: Sample Type
+        name: tablesize
+        type:
+          name: tablesize
+          symbols: [1e9, 2e9, 4e9, 16e9]
+          type: enum
+      - name: type
+        type:
+          name: simple2
+          symbols: [simple]
+          type: enum
+      name: simple
+      type: record
+    - fields:
+      - {default: 20, label: ksize, name: ksize, type: int}
+      - {default: 4, label: n_tables, name: n_tables, type: int}
+      - {label: tablesize, name: tablesize_specific, type: string}
+      - name: type
+        type:
+          name: specific2
+          symbols: [specific]
+          type: enum
+      name: specific
+      type: record
+  - {default: countingtable, id: '#countingtable', type: string}
+  label: Normalize By Median
+  outputs:
+  - id: '#countingtable2'
+    outputBinding: {glob: countingtable}
+    type: File
+  requirements:
+  - {class: ExpressionEngineRequirement, engineCommand: ./galaxy-command-line.py,
+    id: '#galaxy_command_line'}
+  - {class: ExpressionEngineRequirement, engineCommand: ./galaxy-template.py, id: '#galaxy_template'}
+  - class: EnvVarRequirement
+    envDef:
+    - {envName: GALAXY_SLOTS, envValue: ''}

diff -r 48d7f2580fe5 -r 3d90a3a78c3b normalize-by-median.xml
--- a/normalize-by-median.xml Fri Sep 11 14:43:53 2015 -0400
+++ b/normalize-by-median.xml Sat Sep 12 21:05:57 2015 -0400

@@ -24,11 +24,11 @@
#if $unpaired_reads_filename
--unpaired-reads=$unpaired_reads_filename
#end if
-#if $save_countingtable
---savetable=$countingtable
+#if $save_countgraph
+--savegraph=$countgraph
#end if
-#if $countingtable_to_load
---loadtable=$countingtable_to_load
+#if $countgraph_to_load
+--loadgraph=$countgraph_to_load
#end if
--report-total-kmers
#for entry in $many_inputs
@@ -64,16 +64,16 @@
label="Extra unpaired reads."
help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing."
/>
- <param name="countingtable_to_load"
+ <param name="countgraph_to_load"
type="data"
- format="ct"
+ format="oxlicg"
optional="true"
- label="an optional k-mer counting table to load"
- help="(--loadtable) The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file as a starting point." />
- <param name="save_countingtable"
+ label="an optional k-mer countgraph to load"
+ help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." />
+ <param name="save_countgraph"
type="boolean"
- label="Save the k-mer counting table(s) in a file"
- help="(--savetable)" />
+ label="Save the k-mer countgraph(s) in a file"
+ help="(--savegraph)" />
<param name="cutoff"
type="integer"
min="1"
@@ -83,10 +83,10 @@
<expand macro="tableinputs" />
</inputs>
<outputs>
- <data name="countingtable"
- format="ct"
- label="${tool.name} k-mer counting table">
- <filter>save_countingtable == True</filter>
+ <data name="countgraph"
+ format="oxlicg"
+ label="${tool.name} k-mer countgraph">
+ <filter>save_countgraph == True</filter>
</data>

<expand macro="output_sequences_single" />
@@ -154,9 +154,9 @@
individually.

With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified
-file after all sequences have been processed. `--loadtable` will load the
-specified k-mer counting table before processing the specified files. Note
-that these tables are are in the same format as those produced by
+file after all sequences have been processed. `--loadgraph` will load the
+specified k-mer countgraph before processing the specified files. Note
+that the countgraph is in same format as those produced by
`load-into-counting.py` and consumed by `abundance-dist.py`.
]]>
</help>

diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.ct

Binary file test-data/test-abund-read-2.ct has changed

diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.ct.info
--- a/test-data/test-abund-read-2.ct.info Fri Sep 11 14:43:53 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,3 +0,0 @@
-through test-data/test-abund-read-2.fa
-fp rate estimated to be 0.000
-

diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.nobigcount.ct

Binary file test-data/test-abund-read-2.nobigcount.ct has changed

diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.nobigcount.ct.info
--- a/test-data/test-abund-read-2.nobigcount.ct.info Fri Sep 11 14:43:53 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,3 +0,0 @@
-through test-data/test-abund-read-2.fa
-fp rate estimated to be 0.000
-

diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.oxlicg

Binary file test-data/test-abund-read-2.oxlicg has changed

diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.oxlicg.info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.oxlicg.info Sat Sep 12 21:05:57 2015 -0400

@@ -0,0 +1,4 @@
+through /home/mcrusoe/khmer/tests/test-data/test-abund-read-2.fa
+Total number of unique k-mers: 98
+fp rate estimated to be 0.000
+