| Previous changeset 13:48d7f2580fe5 (2015-09-11) Next changeset 15:2c4635f5be47 (2015-09-18) |
|
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit ba6a4d1acd2df42baf4a012bdaac4f6c6051b8dc |
|
modified:
abundance-dist-single.xml abundance-dist.xml count-median.xml filter-abund.xml filter-below-abund.xml macros.xml normalize-by-median.xml |
|
added:
normalize-by-median.cwl test-data/test-abund-read-2.oxlicg test-data/test-abund-read-2.oxlicg.info |
|
removed:
abundance-dist-single.norm.xml filter-abund.norm.xml test-data/test-abund-read-2.ct test-data/test-abund-read-2.ct.info test-data/test-abund-read-2.nobigcount.ct test-data/test-abund-read-2.nobigcount.ct.info |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b abundance-dist-single.norm.xml --- a/abundance-dist-single.norm.xml Fri Sep 11 14:43:53 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| b'@@ -1,297 +0,0 @@\n-<tool id="gedlab-khmer-abundance-dist-single" name="Abundance Distribution (all-in-one)" version="2.0-3">\n-\n-\t<description>\n-\t\tCalculate abundance distribution of the k-mers in a given\n-\t\tsequence file.\n-\t</description>\n-\t<macros>\n-\t\t<macro name="requirements" type="xml">\n-\t\t<requirements>\n-\t\t\t<requirement type="package" version="2.0">khmer</requirement>\n-\t\t</requirements>\n-\t</macro>\n-\t<macro name="version" type="xml">\n-\t\t<version_command>abundance-dist-single.py --version</version_command>\n-\t</macro>\n-\t<macro name="tableinputs" type="xml">\n-\t\t<conditional name="parameters">\n-\t\t\t<param help="ksize, n_tables, a specific tablesize" label="Advanced Parameters" name="type" type="select">\n-\t\t\t\t<option selected="true" value="simple">\n-\t\t\t\t\tHide\n-\t\t\t\t</option>\n-\t\t\t\t<option value="specific">\n-\t\t\t\t\tShow\n-\t\t\t\t</option>\n- \t\t</param>\n-\t\t\t<when value="simple">\n-\t\t\t\t<param display="radio" label="Sample Type" name="tablesize" type="select">\n-\t\t\t\t\t<option selected="true" value="1e9">\n-\t\t\t\t\t\tMicrobial Genome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="2e9">\n-\t\t\t\t\t\tAnimal Transcriptome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="4e9">\n-\t\t\t\t\t\tSmall Animal Genome or\n-\t\t\t\t\t\tLow-Diversity Metagenome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="16e9">\n-\t\t\t\t\t\tLarge Animal Genome\n-\t\t\t\t\t</option>\n-\t\t\t\t</param>\n-\t\t\t</when>\n-\t\t\t<when value="specific">\n-\t\t\t\t<param help="k-mer size to use" label="ksize" name="ksize" type="integer" value="20" />\n-\t\t\t\t<param help="number of tables to use" label="n_tables" min="1" name="n_tables" type="integer" value="4" />\n-\t\t\t\t<param help="lower bound on the tablesize to use" label="tablesize" name="tablesize_specific" type="text" />\n-\t\t\t</when>\n-\t\t</conditional>\n-\t</macro>\n-\t<macro name="input_sequences_filenames" type="xml">\n- <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" help="Put in order of precedence such as longest reads first." label="FAST[AQ] file(s)" multiple="true" name="inputs" type="data" />\n-\t</macro>\n-\t<macro name="input_sequence_filename" type="xml">\n- <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" label="FAST[AQ] file(s)" name="input_sequence_filename" type="data" />\n-\t</macro>\n-\t<macro name="input_counting_table_filename" type="xml">\n- <param format="ct" help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." label="the k-mer counting table to query" name="input_counting_table_filename" type="data" />\n-\t</macro>\n-\t<macro name="abundance-histogram-output" type="xml">\n- <data format="txt" label="${tool.name} k-mer abundance histogram. The columns are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct k-mers." name="output_histogram_filename">\n- </data>\n-\t</macro>\n-\t<macro name="output_sequences" type="xml">\n- <data format_source="inputs" label="${tool.name} processed nucleotide sequence file" name="output">\n- <discover_datasets directory="output" pattern="__name__" visible="true" />\n- </data>\n-\t</macro>\n-\t<macro name="output_sequences_single" type="xml">\n- <data format_source="input_sequence_filename" label="${tool.name} processed nucleotide sequence file" name="output" />\n-\t</macro>\n-\t<macro name="input_zero" type="xml">\n- <param checked="true" falsevalue="--no-zero" help="Output zero count bins (--no-zero)" name="zero" truevalue="" type="boolean" />\n-\t</macro>\n-\t<macro name="software-citation" type="xml">\n-\t\t<citation type="bibtex">@article{khmer2015,\n- author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n- and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n- Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n- Jacob and Fenzl, T'..b'lp="number of tables to use" label="n_tables" min="1" name="n_tables" type="integer" value="4" />\n-\t\t\t\t<param help="lower bound on the tablesize to use" label="tablesize" name="tablesize_specific" type="text" />\n-\t\t\t</when>\n-\t\t</conditional>\n-\t</inputs>\n-\t<outputs>\n-\t\t<data format="ct" label="${tool.name} k-mer counting table" name="optional_output_countingtable">\n-\t\t\t<filter>save_countingtable == True</filter>\n-\t\t</data>\n-\t\t<data format="txt" label="${tool.name} k-mer abundance histogram. The columns are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct k-mers." name="output_histogram_filename">\n- </data>\n-\t</outputs>\n- <tests>\n-\t <test>\n-\t\t <param name="input_sequence_filename" value="test-abund-read-2.fa" />\n-\t\t <param name="type" value="specific" />\n-\t\t <param name="tablesize_specific" value="1e7" />\n-\t\t <param name="n_tables" value="2" />\n-\t\t <param name="ksize" value="17" />\n-\t\t <param name="no_zero" value="false" />\n-\t\t <output name="output_histogram_filename">\n-\t\t\t <assert_contents>\n-\t\t\t\t <has_text text="1,96,96,0.98" />\n-\t\t\t\t <has_text text="1001,2,98,1.0" />\n-\t\t\t </assert_contents>\n-\t\t </output>\n-\t </test>\n-\t <test>\n-\t\t <param name="input_sequence_filename" value="test-abund-read-2.fa" />\n-\t\t <param name="type" value="specific" />\n-\t\t <param name="tablesize_specific" value="1e7" />\n-\t\t <param name="n_tables" value="2" />\n-\t\t <param name="ksize" value="17" />\n-\t\t <param name="no_zero" value="false" />\n-\t\t <param name="bigcount" value="false" />\n-\t\t <output name="output_histogram_filename">\n-\t\t\t <assert_contents>\n-\t\t\t\t <has_text text="1,96,96,0.98" />\n-\t\t\t\t <has_text text="255,2,98,1.0" />\n-\t\t\t </assert_contents>\n-\t\t </output>\n-\t </test>\n-\n- </tests>\n- <help>\n-Calculate the abundance distribution of k-mers from a single sequence file.\n-\n-Note that with `-b` this script is constant memory; in exchange,\n-k-mer counts will stop at 255. The memory usage of this script with\n-`-b` will be about 1.15x the product of the `-x` and\n-`-N` numbers.\n-\n-To count k-mers in multiple files use `load_into_counting.py` and\n-`abundance_dist.py`.\n-\n- </help>\n- <citations>\n-\t<citation type="bibtex">@article{khmer2015,\n- author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n- and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n- Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n- Jacob and Fenzl, Thomas and Fish, Jordan and Garcia-Gutierrez, Leonor and\n- Garland, Phillip and Gluck, Jonathan and González, Iván and Guermond, Sarah\n- and Guo, Jiarong and Gupta, Aditi and Herr, Joshua R. and Howe, Adina and\n- Hyer, Alex and Härpfer, Andreas and Irber, Luiz and Kidd, Rhys and Lin, David\n- and Lippi, Justin and Mansour, Tamer and McA\'Nulty, Pamela and McDonald, Eric\n- and Mizzi, Jessica and Murray, Kevin D. and Nahum, Joshua R. and Nanlohy,\n- Kaben and Nederbragt, Alexander Johan and Ortiz-Zuazaga, Humberto and Ory,\n- Jeramia and Pell, Jason and Pepe-Ranney, Charles and Russ, Zachary N and\n- Schwarz, Erich and Scott, Camille and Seaman, Josiah and Sievert, Scott and\n- Simpson, Jared and Skennerton, Connor T. and Spencer, James and Srinivasan,\n- Ramakrishnan and Standage, Daniel and Stapleton, James A. and Stein, Joe and\n- Steinman, Susan R and Taylor, Benjamin and Trimble, Will and Wiencko, Heather\n- L. and Wright, Michael and Wyss, Brian and Zhang, Qingpeng and zyme, en and\n- Brown, C. Titus"\n- title = "The khmer software package: enabling efficient nucleotide\n- sequence analysis",\n- year = "2015",\n- month = "08",\n- publisher = "F1000",\n- url = "http://dx.doi.org/10.12688/f1000research.6924.1"\n- }</citation>\n-\t<citation type="doi">10.1371/journal.pone.0101271</citation>\n-\t</citations>\n-</tool>\n' |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b abundance-dist-single.xml --- a/abundance-dist-single.xml Fri Sep 11 14:43:53 2015 -0400 +++ b/abundance-dist-single.xml Sat Sep 12 21:05:57 2015 -0400 |
| b |
| @@ -1,6 +1,6 @@ <tool id="gedlab-khmer-abundance-dist-single" name="Abundance Distribution (all-in-one)" - version="2.0-3"> + version="2.0-4"> <description> Calculate abundance distribution of the k-mers in a given @@ -21,8 +21,8 @@ @TABLEPARAMS@ $zero $bigcount -#if $save_countingtable ---savetable=$optional_output_countingtable +#if $save_countgraph +--savegraph=$optional_output_countgraph #end if --squash @THREADS@ @@ -33,24 +33,19 @@ <inputs> <expand macro="input_sequence_filename" /> - <param name="save_countingtable" + <param name="save_countgraph" type="boolean" - label="Save the k-mer counting table(s) in a file" - help="(--savetable)" /> + label="Save the k-mer countgraph to a file" + help="(--savegraph)" /> <expand macro="input_zero" /> - <param name="bigcount" - type="boolean" - truevalue="" - falsevalue="--no-bigcount" - checked="true" - help="Count k-mers past 255 (--no-bigcount)" /> + <expand macro="input_bigcount" /> <expand macro="tableinputs" /> </inputs> <outputs> - <data name="optional_output_countingtable" - format="ct" - label="${tool.name} k-mer counting table"> - <filter>save_countingtable == True</filter> + <data name="optional_output_countgraph" + format="oxlicg" + label="${tool.name} k-mer countgraph"> + <filter>save_countgraph == True</filter> </data> <expand macro="abundance-histogram-output" /> </outputs> |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b abundance-dist.xml --- a/abundance-dist.xml Fri Sep 11 14:43:53 2015 -0400 +++ b/abundance-dist.xml Sat Sep 12 21:05:57 2015 -0400 |
| [ |
| @@ -1,10 +1,10 @@ <tool id="gedlab-khmer-abundance-dist" name="Abundance Distribution" - version="2.0-2"> + version="2.0-3"> <description> Calculate abundance distribution of the k-mers in a given sequence - file using a pre-made k-mer counting table. + file using a pre-made k-mer countgraph. </description> <macros> <token name="@BINARY@">abundance-dist.py</token> @@ -19,16 +19,19 @@ mkdir output; cd output; @BINARY@ --squash -$input_counting_table_filename +$zero +$bigcount +$input_countgraph_filename $input_sequence_filename $output_histogram_filename ]]> </command> <inputs> - <expand macro="input_counting_table_filename" /> + <expand macro="input_countgraph_filename" /> <expand macro="input_sequence_filename" /> <expand macro="input_zero" /> + <expand macro="input_bigcount" /> </inputs> <outputs> <expand macro="abundance-histogram-output" /> @@ -36,33 +39,31 @@ <tests> <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> - <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" /> - <param name="no_zero" value="false" /> + <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" /> + <param name="zero" value="false" /> <output name="output_histogram_filename"> <assert_contents> - <has_line_matching expression="1,96,96,0.98" /> - <has_line_matching expression="1001,2,98,1.0" /> + <has_text text="1,96,96,0.98" /> + <has_text text="1001,2,98,1.0" /> </assert_contents> </output> </test> <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> - <param name="input_counting_table_filename" value="test-abund-read-2.nobigcount.ct" ftype="ct" /> - <param name="no_zero" value="false" /> - <assert_stderr> - <has_line_matching expression="WARNING: The loaded graph has bigcount" /> - </assert_stderr> + <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" /> + <param name="zero" value="false" /> + <param name="bigcount" value="false" /> <output name="output_histogram_filename"> <assert_contents> - <has_line_matching expression="1,96,96,0.98" /> - <has_line_matching expression="255,2,98,1.0" /> + <has_text text="1,96,96,0.98" /> + <has_text text="255,2,98,1.0" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ Calculate abundance distribution of the k-mers in the sequence file using a -pre-made k-mer counting table. +pre-made k-mer countgraph. ]]> </help> <citations> |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b count-median.xml --- a/count-median.xml Fri Sep 11 14:43:53 2015 -0400 +++ b/count-median.xml Sat Sep 12 21:05:57 2015 -0400 |
| [ |
| @@ -5,7 +5,7 @@ <description> Count the median/avg k-mer abundance for each sequence in the input file, based on the k-mer counts in the given k-mer - counting table. Can be used to estimate expression levels + countgraph. Can be used to estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). </description> <macros> @@ -17,7 +17,7 @@ <expand macro="version" /> <command><![CDATA[ @BINARY@ -$input_counting_table_filename +$input_countgraph_filename $input_sequence_filename $output_summary_filename ]]> @@ -25,7 +25,7 @@ <inputs> <expand macro="input_sequence_filename" /> - <expand macro="input_counting_table_filename" /> + <expand macro="input_countgraph_filename" /> </inputs> <outputs> <data name="output_summary_filename" format="txt" @@ -35,21 +35,19 @@ <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> - <param name="input_counting_table_filename" - value="test-abund-read-2.ct" ftype="ct" /> + <param name="input_countgraph_filename" + value="test-abund-read-2.oxlicg" ftype="oxlicg" /> <output name="output_summary_filename"> <assert_contents> - <has_line_matching - expression="seq 1001 1001.0 0.0 18" /> - <has_line_matching - expression="895:1:37:17593:9954/1 1 103.803741455 303.702941895 114" /> + <has_text text="seq,1001,1001.0,0.0,18" /> + <has_text text="895:1:37:17593:9954/1,1,21.408163071,141.391921997,114" /> </assert_contents> </output> </test> </tests> <help> Count the median/avg k-mer abundance for each sequence in the input file, -based on the k-mer counts in the given k-mer counting table. Can be used to +based on the k-mer counts in the given k-mer countgraph. Can be used to estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The output file contains sequence id, median, average, stddev, and seq length; fields are separated by spaces. For khmer 1.x count-median.py will split |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b filter-abund.norm.xml --- a/filter-abund.norm.xml Fri Sep 11 14:43:53 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| b'@@ -1,241 +0,0 @@\n-<tool id="gedlab-khmer-filter-abund" name="Filter by abundance" version="2.0-3">\n-\n-\t<description>\n-\t\tTrims fastq/fasta sequences at k-mers of a given abundance\n-\t\tbased on a provided k-mer counting table.\n-\t</description>\n-\t<macros>\n-\t\t<macro name="requirements" type="xml">\n-\t\t<requirements>\n-\t\t\t<requirement type="package" version="2.0">khmer</requirement>\n-\t\t</requirements>\n-\t</macro>\n-\t<macro name="version" type="xml">\n-\t\t<version_command>filter-abund.py --version</version_command>\n-\t</macro>\n-\t<macro name="tableinputs" type="xml">\n-\t\t<conditional name="parameters">\n-\t\t\t<param help="ksize, n_tables, a specific tablesize" label="Advanced Parameters" name="type" type="select">\n-\t\t\t\t<option selected="true" value="simple">\n-\t\t\t\t\tHide\n-\t\t\t\t</option>\n-\t\t\t\t<option value="specific">\n-\t\t\t\t\tShow\n-\t\t\t\t</option>\n- \t\t</param>\n-\t\t\t<when value="simple">\n-\t\t\t\t<param display="radio" label="Sample Type" name="tablesize" type="select">\n-\t\t\t\t\t<option selected="true" value="1e9">\n-\t\t\t\t\t\tMicrobial Genome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="2e9">\n-\t\t\t\t\t\tAnimal Transcriptome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="4e9">\n-\t\t\t\t\t\tSmall Animal Genome or\n-\t\t\t\t\t\tLow-Diversity Metagenome\n-\t\t\t\t\t</option>\n-\t\t\t\t\t<option value="16e9">\n-\t\t\t\t\t\tLarge Animal Genome\n-\t\t\t\t\t</option>\n-\t\t\t\t</param>\n-\t\t\t</when>\n-\t\t\t<when value="specific">\n-\t\t\t\t<param help="k-mer size to use" label="ksize" name="ksize" type="integer" value="20" />\n-\t\t\t\t<param help="number of tables to use" label="n_tables" min="1" name="n_tables" type="integer" value="4" />\n-\t\t\t\t<param help="lower bound on the tablesize to use" label="tablesize" name="tablesize_specific" type="text" />\n-\t\t\t</when>\n-\t\t</conditional>\n-\t</macro>\n-\t<macro name="input_sequences_filenames" type="xml">\n- <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" help="Put in order of precedence such as longest reads first." label="FAST[AQ] file(s)" multiple="true" name="inputs" type="data" />\n-\t</macro>\n-\t<macro name="input_sequence_filename" type="xml">\n- <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" label="FAST[AQ] file(s)" name="input_sequence_filename" type="data" />\n-\t</macro>\n-\t<macro name="input_counting_table_filename" type="xml">\n- <param format="ct" help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." label="the k-mer counting table to query" name="input_counting_table_filename" type="data" />\n-\t</macro>\n-\t<macro name="abundance-histogram-output" type="xml">\n- <data format="txt" label="${tool.name} k-mer abundance histogram. The columns are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct k-mers." name="output_histogram_filename">\n- </data>\n-\t</macro>\n-\t<macro name="output_sequences" type="xml">\n- <data format_source="inputs" label="${tool.name} processed nucleotide sequence file" name="output">\n- <discover_datasets directory="output" pattern="__name__" visible="true" />\n- </data>\n-\t</macro>\n-\t<macro name="output_sequences_single" type="xml">\n- <data format_source="input_sequence_filename" label="${tool.name} processed nucleotide sequence file" name="output" />\n-\t</macro>\n-\t<macro name="input_zero" type="xml">\n- <param checked="true" falsevalue="--no-zero" help="Output zero count bins (--no-zero)" name="zero" truevalue="" type="boolean" />\n-\t</macro>\n-\t<macro name="software-citation" type="xml">\n-\t\t<citation type="bibtex">@article{khmer2015,\n- author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n- and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n- Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n- Jacob and Fenzl, Thomas '..b'sequence has high enough coverage; median abundance > 20 (--variable_coverage)" label="Variable coverage" name="variable_coverage" truevalue="--variable-coverage" type="boolean" />\n-\t\t<param help="Trim at k-mers below this abundance. (--cutoff)" label="cutoff" name="cutoff" type="integer" value="2" />\n-\t\t<param format="ct" help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." label="the k-mer counting table to query" name="input_counting_table_filename" type="data" />\n-\t</inputs>\n-\t<outputs>\n-\t\t<data format_source="inputs" label="${tool.name} processed nucleotide sequence file" name="output">\n- <discover_datasets directory="output" pattern="__name__" visible="true" />\n- </data>\n-\t</outputs>\n-\t<tests>\n- <test>\n- <param name="inputs" value="test-abund-read-2.fa" />\n-\t\t\t<param ftype="ct" name="input_counting_table_filename" value="test-abund-read-2.ct" />\n- <output name="output">\n-\t\t\t\t<discover_dataset name="test-abund-read-2.fa.abundfilt"> \n- \t<assert_contents>\n- \t<has_text text="GGTTGACGGGGCTCAGGG" />\n- \t</assert_contents>\n-\t\t\t\t</discover_dataset>\n- </output>\n- </test>\n- <test>\n-\t\t\t<param name="inputs" value="test-abund-read-2.fa" />\n-\t\t\t<param ftype="ct" name="input_counting_table_filename" value="test-abund-read-2.ct" />\n-\t\t\t<param name="cutoff" value="1" />\n- <output name="output">\n-\t\t\t\t<discover_dataset name="test-abund-read-2.fa.abundfilt">\n- \t<assert_contents>\n- \t<has_text text="GGTTGACGGGGCTCAGGG" />\n- \t</assert_contents>\n-\t\t\t\t</discover_dataset>\n- </output>\n- </test>\n- </tests>\n-\t<help>\n-Trim sequences at a minimum k-mer abundance.\t\t\n-\t\t\n-Trimmed sequences will be placed in ${input_sequence_filename}.abundfilt\n-for each input sequence file. If the input sequences are from RNAseq or\n-metagenome sequencing then `--variable-coverage` should be used.\n-\n-\t</help>\n-\t<citations>\n-\t\t<citation type="bibtex">@article{khmer2015,\n- author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n- and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n- Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n- Jacob and Fenzl, Thomas and Fish, Jordan and Garcia-Gutierrez, Leonor and\n- Garland, Phillip and Gluck, Jonathan and González, Iván and Guermond, Sarah\n- and Guo, Jiarong and Gupta, Aditi and Herr, Joshua R. and Howe, Adina and\n- Hyer, Alex and Härpfer, Andreas and Irber, Luiz and Kidd, Rhys and Lin, David\n- and Lippi, Justin and Mansour, Tamer and McA\'Nulty, Pamela and McDonald, Eric\n- and Mizzi, Jessica and Murray, Kevin D. and Nahum, Joshua R. and Nanlohy,\n- Kaben and Nederbragt, Alexander Johan and Ortiz-Zuazaga, Humberto and Ory,\n- Jeramia and Pell, Jason and Pepe-Ranney, Charles and Russ, Zachary N and\n- Schwarz, Erich and Scott, Camille and Seaman, Josiah and Sievert, Scott and\n- Simpson, Jared and Skennerton, Connor T. and Spencer, James and Srinivasan,\n- Ramakrishnan and Standage, Daniel and Stapleton, James A. and Stein, Joe and\n- Steinman, Susan R and Taylor, Benjamin and Trimble, Will and Wiencko, Heather\n- L. and Wright, Michael and Wyss, Brian and Zhang, Qingpeng and zyme, en and\n- Brown, C. Titus"\n- title = "The khmer software package: enabling efficient nucleotide\n- sequence analysis",\n- year = "2015",\n- month = "08",\n- publisher = "F1000",\n- url = "http://dx.doi.org/10.12688/f1000research.6924.1"\n- }</citation>\n-\t<citation type="doi">10.1371/journal.pone.0101271</citation>\n-\t</citations>\n-</tool>\n' |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b filter-abund.xml --- a/filter-abund.xml Fri Sep 11 14:43:53 2015 -0400 +++ b/filter-abund.xml Sat Sep 12 21:05:57 2015 -0400 |
| b |
| @@ -4,7 +4,7 @@ <description> Trims fastq/fasta sequences at k-mers of a given abundance - based on a provided k-mer counting table. + based on a provided k-mer countgraph. </description> <macros> <token name="@BINARY@">filter-abund.py</token> @@ -21,7 +21,7 @@ #end if $variable_coverage @THREADS@ -$input_counting_table_filename +$input_countgraph_filename #for input in $inputs $input #end for @@ -42,7 +42,7 @@ value="2" label="cutoff" help="Trim at k-mers below this abundance. (--cutoff)" /> - <expand macro="input_counting_table_filename" /> + <expand macro="input_countgraph_filename" /> </inputs> <outputs> <expand macro="output_sequences" /> @@ -50,8 +50,8 @@ <tests> <test> <param name="inputs" value="test-abund-read-2.fa" /> - <param name="input_counting_table_filename" - value="test-abund-read-2.ct" ftype="ct" /> + <param name="input_countgraph_filename" + value="test-abund-read-2.oxlicg" ftype="oxlicg" /> <output name="output"> <discover_dataset name="test-abund-read-2.fa.abundfilt"> <assert_contents> @@ -62,8 +62,8 @@ </test> <test> <param name="inputs" value="test-abund-read-2.fa" /> - <param name="input_counting_table_filename" - value="test-abund-read-2.ct" ftype="ct" /> + <param name="input_countgraph_filename" + value="test-abund-read-2.oxlicg" ftype="oxlicg" /> <param name="cutoff" value="1" /> <output name="output"> <discover_dataset name="test-abund-read-2.fa.abundfilt"> |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b filter-below-abund.xml --- a/filter-below-abund.xml Fri Sep 11 14:43:53 2015 -0400 +++ b/filter-below-abund.xml Sat Sep 12 21:05:57 2015 -0400 |
| b |
| @@ -4,7 +4,7 @@ <description> Trims fastq/fasta sequences at k-mers with abundance below 50 - based on a provided k-mer counting table. + based on a provided k-mer countgraph. </description> <macros> <token name="@BINARY@">filter-below-abund.py</token> @@ -16,7 +16,7 @@ <command> mkdir output; cd output; @BINARY@ -$input_counting_table_filename +$input_countgraph_filename #for input in $inputs $input #end for @@ -24,7 +24,7 @@ <inputs> <expand macro="input_sequences_filenames" /> - <expand macro="input_counting_table_filename" /> + <expand macro="input_countgraph_filename" /> </inputs> <outputs> <!-- <expand macro="output_sequences" /> --> @@ -33,7 +33,7 @@ <!-- <tests> <test> <param name="inputs" value="test-abund-read-2.fa" /> - <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" /> + <param name="input_countgraph_filename" value="test-abund-read-2.ct" ftype="oxlicg" /> <output name="output"> <discover_dataset name="test-abund-read-2.fa.abundfilt"> </discover_dataset> @@ -41,7 +41,7 @@ </test> <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> - <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" /> + <param name="input_countgraph_filename" value="test-abund-read-2.ct" ftype="oxlicg" /> <param name="cutoff" value="1" /> <output name="output"> <discover_dataset name="test-abund-read-2.fa.abundfilt"> |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b macros.xml --- a/macros.xml Fri Sep 11 14:43:53 2015 -0400 +++ b/macros.xml Sat Sep 12 21:05:57 2015 -0400 |
| [ |
| @@ -85,12 +85,12 @@ format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" label="FAST[AQ] file(s)" /> </xml> - <xml name="input_counting_table_filename"> - <param name="input_counting_table_filename" + <xml name="input_countgraph_filename"> + <param name="input_countgraph_filename" type="data" - format="ct" - label="the k-mer counting table to query" - help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." /> + format="oxlicg" + label="the k-mer countgraph to query" + help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." /> </xml> <xml name="abundance-histogram-output"> <data name="output_histogram_filename" @@ -121,6 +121,14 @@ checked="true" help="Output zero count bins (--no-zero)" /> </xml> + <xml name="input_bigcount"> + <param name="bigcount" + type="boolean" + truevalue="" + falsevalue="--no-bigcount" + checked="true" + help="Count k-mers past 255 occurences (--no-bigcount)" /> + </xml> <xml name="software-citation"> <citation type="bibtex">@article{khmer2015, author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b normalize-by-median.cwl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/normalize-by-median.cwl Sat Sep 12 21:05:57 2015 -0400 |
| [ |
| @@ -0,0 +1,72 @@ +#!/usr/bin/env cwl-runner +- arguments: + - valueFrom: {engine: '#galaxy_command_line', script: " \nmkdir output;\ncd output;\n\ + normalize-by-median.py\n$paired_switch\n@TABLEPARAMS@\n--cutoff=$cutoff\n\ + #if $save_countingtable\n--savetable=$countingtable\n#end if\n#if $countingtable_to_load\n\ + --loadtable=$countingtable_to_load\n#end if\n--report-total-kmers\n#for entry\ + \ in $many_inputs\n#for input in $entry.inputs\n$input\n#end for\n#end for\n\ + --out=$output\n"} + baseCommand: [/bin/sh, -c] + class: CommandLineTool + id: '#gedlab-khmer-normalize-by-median' + inputs: + - id: '#many_inputs' + type: + items: + fields: + - {label: 'FAST[AQ] file(s)', name: inputs, type: File} + name: many_inputs + type: record + type: array + - default: '' + id: '#paired_switch' + label: Are the inputs interleaved paired ends? + type: + name: paired_switch + symbols: ['', --paired] + type: enum + - {id: '#countingtable_to_load', label: an optional k-mer counting table to load, + type: File} + - {default: 'false', id: '#save_countingtable', label: Save the k-mer counting table(s) + in a file, type: boolean} + - {default: 20, id: '#cutoff', label: cutoff, type: int} + - id: '#parameters' + type: + - fields: + - label: Sample Type + name: tablesize + type: + name: tablesize + symbols: [1e9, 2e9, 4e9, 16e9] + type: enum + - name: type + type: + name: simple2 + symbols: [simple] + type: enum + name: simple + type: record + - fields: + - {default: 20, label: ksize, name: ksize, type: int} + - {default: 4, label: n_tables, name: n_tables, type: int} + - {label: tablesize, name: tablesize_specific, type: string} + - name: type + type: + name: specific2 + symbols: [specific] + type: enum + name: specific + type: record + - {default: countingtable, id: '#countingtable', type: string} + label: Normalize By Median + outputs: + - id: '#countingtable2' + outputBinding: {glob: countingtable} + type: File + requirements: + - {class: ExpressionEngineRequirement, engineCommand: ./galaxy-command-line.py, + id: '#galaxy_command_line'} + - {class: ExpressionEngineRequirement, engineCommand: ./galaxy-template.py, id: '#galaxy_template'} + - class: EnvVarRequirement + envDef: + - {envName: GALAXY_SLOTS, envValue: ''} |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b normalize-by-median.xml --- a/normalize-by-median.xml Fri Sep 11 14:43:53 2015 -0400 +++ b/normalize-by-median.xml Sat Sep 12 21:05:57 2015 -0400 |
| b |
| @@ -24,11 +24,11 @@ #if $unpaired_reads_filename --unpaired-reads=$unpaired_reads_filename #end if -#if $save_countingtable ---savetable=$countingtable +#if $save_countgraph +--savegraph=$countgraph #end if -#if $countingtable_to_load ---loadtable=$countingtable_to_load +#if $countgraph_to_load +--loadgraph=$countgraph_to_load #end if --report-total-kmers #for entry in $many_inputs @@ -64,16 +64,16 @@ label="Extra unpaired reads." help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." /> - <param name="countingtable_to_load" + <param name="countgraph_to_load" type="data" - format="ct" + format="oxlicg" optional="true" - label="an optional k-mer counting table to load" - help="(--loadtable) The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file as a starting point." /> - <param name="save_countingtable" + label="an optional k-mer countgraph to load" + help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." /> + <param name="save_countgraph" type="boolean" - label="Save the k-mer counting table(s) in a file" - help="(--savetable)" /> + label="Save the k-mer countgraph(s) in a file" + help="(--savegraph)" /> <param name="cutoff" type="integer" min="1" @@ -83,10 +83,10 @@ <expand macro="tableinputs" /> </inputs> <outputs> - <data name="countingtable" - format="ct" - label="${tool.name} k-mer counting table"> - <filter>save_countingtable == True</filter> + <data name="countgraph" + format="oxlicg" + label="${tool.name} k-mer countgraph"> + <filter>save_countgraph == True</filter> </data> <!-- <expand macro="output_sequences" /> --> <expand macro="output_sequences_single" /> @@ -154,9 +154,9 @@ individually. With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified -file after all sequences have been processed. `--loadtable` will load the -specified k-mer counting table before processing the specified files. Note -that these tables are are in the same format as those produced by +file after all sequences have been processed. `--loadgraph` will load the +specified k-mer countgraph before processing the specified files. Note +that the countgraph is in same format as those produced by `load-into-counting.py` and consumed by `abundance-dist.py`. ]]> </help> |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.ct |
| b |
| Binary file test-data/test-abund-read-2.ct has changed |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.ct.info --- a/test-data/test-abund-read-2.ct.info Fri Sep 11 14:43:53 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,3 +0,0 @@ -through test-data/test-abund-read-2.fa -fp rate estimated to be 0.000 - |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.nobigcount.ct |
| b |
| Binary file test-data/test-abund-read-2.nobigcount.ct has changed |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.nobigcount.ct.info --- a/test-data/test-abund-read-2.nobigcount.ct.info Fri Sep 11 14:43:53 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,3 +0,0 @@ -through test-data/test-abund-read-2.fa -fp rate estimated to be 0.000 - |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.oxlicg |
| b |
| Binary file test-data/test-abund-read-2.oxlicg has changed |
| b |
| diff -r 48d7f2580fe5 -r 3d90a3a78c3b test-data/test-abund-read-2.oxlicg.info --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-2.oxlicg.info Sat Sep 12 21:05:57 2015 -0400 |
| b |
| @@ -0,0 +1,4 @@ +through /home/mcrusoe/khmer/tests/test-data/test-abund-read-2.fa +Total number of unique k-mers: 98 +fp rate estimated to be 0.000 + |