Repository 'khmer'
hg clone https://toolshed.g2.bx.psu.edu/repos/crusoe/khmer

Changeset 13:48d7f2580fe5 (2015-09-11)
Previous changeset 12:f458fd505ec6 (2015-09-11) Next changeset 14:3d90a3a78c3b (2015-09-12)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit a84f63d100e17821dca8d9570db265cfcd478aad
modified:
abundance-dist-single.norm.xml
filter-abund.xml
added:
filter-abund.norm.xml
b
diff -r f458fd505ec6 -r 48d7f2580fe5 abundance-dist-single.norm.xml
--- a/abundance-dist-single.norm.xml Fri Sep 11 14:14:29 2015 -0400
+++ b/abundance-dist-single.norm.xml Fri Sep 11 14:43:53 2015 -0400
[
b'@@ -1,77 +1,78 @@\n <tool id="gedlab-khmer-abundance-dist-single" name="Abundance Distribution (all-in-one)" version="2.0-3">\n-    <description>\n+\n+\t<description>\n \t\tCalculate abundance distribution of the k-mers in a given\n \t\tsequence file.\n \t</description>\n-    <macros>\n-        <macro name="requirements" type="xml">\n-            <requirements>\n-                <requirement type="package" version="2.0">khmer</requirement>\n-            </requirements>\n-        </macro>\n-        <macro name="version" type="xml">\n-            <version_command>abundance-dist-single.py --version</version_command>\n-        </macro>\n-        <macro name="tableinputs" type="xml">\n-            <conditional name="parameters">\n-                <param help="ksize, n_tables, a specific tablesize" label="Advanced Parameters" name="type" type="select">\n-                    <option selected="true" value="simple">\n+\t<macros>\n+\t\t<macro name="requirements" type="xml">\n+\t\t<requirements>\n+\t\t\t<requirement type="package" version="2.0">khmer</requirement>\n+\t\t</requirements>\n+\t</macro>\n+\t<macro name="version" type="xml">\n+\t\t<version_command>abundance-dist-single.py --version</version_command>\n+\t</macro>\n+\t<macro name="tableinputs" type="xml">\n+\t\t<conditional name="parameters">\n+\t\t\t<param help="ksize, n_tables, a specific tablesize" label="Advanced Parameters" name="type" type="select">\n+\t\t\t\t<option selected="true" value="simple">\n \t\t\t\t\tHide\n \t\t\t\t</option>\n-                    <option value="specific">\n+\t\t\t\t<option value="specific">\n \t\t\t\t\tShow\n \t\t\t\t</option>\n-                </param>\n-                <when value="simple">\n-                    <param display="radio" label="Sample Type" name="tablesize" type="select">\n-                        <option selected="true" value="1e9">\n+            \t\t</param>\n+\t\t\t<when value="simple">\n+\t\t\t\t<param display="radio" label="Sample Type" name="tablesize" type="select">\n+\t\t\t\t\t<option selected="true" value="1e9">\n \t\t\t\t\t\tMicrobial Genome\n \t\t\t\t\t</option>\n-                        <option value="2e9">\n+\t\t\t\t\t<option value="2e9">\n \t\t\t\t\t\tAnimal Transcriptome\n \t\t\t\t\t</option>\n-                        <option value="4e9">\n+\t\t\t\t\t<option value="4e9">\n \t\t\t\t\t\tSmall Animal Genome or\n \t\t\t\t\t\tLow-Diversity Metagenome\n \t\t\t\t\t</option>\n-                        <option value="16e9">\n+\t\t\t\t\t<option value="16e9">\n \t\t\t\t\t\tLarge Animal Genome\n \t\t\t\t\t</option>\n-                    </param>\n-                </when>\n-                <when value="specific">\n-                    <param help="k-mer size to use" label="ksize" name="ksize" type="integer" value="20" />\n-                    <param help="number of tables to use" label="n_tables" min="1" name="n_tables" type="integer" value="4" />\n-                    <param help="lower bound on the tablesize to use" label="tablesize" name="tablesize_specific" type="text" />\n-                </when>\n-            </conditional>\n-        </macro>\n-        <macro name="input_sequences_filenames" type="xml">\n-            <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" help="Put in order of precedence such as longest reads first." label="FAST[AQ] file(s)" multiple="true" name="inputs" type="data" />\n-        </macro>\n-        <macro name="input_sequence_filename" type="xml">\n-            <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" label="FAST[AQ] file(s)" name="input_sequence_filename" type="data" />\n-        </macro>\n-        <macro name="input_counting_table_filename" type="xml">\n-            <param format="ct" help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." label="the k-mer counting table to query" name="input_counting_table_filename" type="data" />\n-        </macro>\n-        <macro name="abundance-histogram-output" type="xml">\n-            <data format="txt" label="${tool.name} k-mer abundance histogram. The                         columns are: (1) k-mer abundance, (2) k-mer count, (3)     '..b'e" label="n_tables" min="1" name="n_tables" type="integer" value="4" />\n+\t\t\t\t<param help="lower bound on the tablesize to use" label="tablesize" name="tablesize_specific" type="text" />\n+\t\t\t</when>\n+\t\t</conditional>\n+\t</inputs>\n+\t<outputs>\n+\t\t<data format="ct" label="${tool.name} k-mer counting table" name="optional_output_countingtable">\n+\t\t\t<filter>save_countingtable == True</filter>\n+\t\t</data>\n+\t\t<data format="txt" label="${tool.name} k-mer abundance histogram. The                         columns are: (1) k-mer abundance, (2) k-mer count, (3)                         cumulative count, (4) fraction of total distinct                         k-mers." name="output_histogram_filename">\n                 </data>\n-    </outputs>\n+\t</outputs>\n     <tests>\n-        <test>\n-            <param name="input_sequence_filename" value="test-abund-read-2.fa" />\n-            <param name="type" value="specific" />\n-            <param name="tablesize_specific" value="1e7" />\n-            <param name="n_tables" value="2" />\n-            <param name="ksize" value="17" />\n-            <param name="no_zero" value="false" />\n-            <output name="output_histogram_filename">\n-                <assert_contents>\n-                    <has_text text="1,96,96,0.98" />\n-                    <has_text text="1001,2,98,1.0" />\n-                </assert_contents>\n-            </output>\n-        </test>\n-        <test>\n-            <param name="input_sequence_filename" value="test-abund-read-2.fa" />\n-            <param name="type" value="specific" />\n-            <param name="tablesize_specific" value="1e7" />\n-            <param name="n_tables" value="2" />\n-            <param name="ksize" value="17" />\n-            <param name="no_zero" value="false" />\n-            <param name="bigcount" value="false" />\n-            <output name="output_histogram_filename">\n-                <assert_contents>\n-                    <has_text text="1,96,96,0.98" />\n-                    <has_text text="255,2,98,1.0" />\n-                </assert_contents>\n-            </output>\n-        </test>\n+\t    <test>\n+\t\t    <param name="input_sequence_filename" value="test-abund-read-2.fa" />\n+\t\t    <param name="type" value="specific" />\n+\t\t    <param name="tablesize_specific" value="1e7" />\n+\t\t    <param name="n_tables" value="2" />\n+\t\t    <param name="ksize" value="17" />\n+\t\t    <param name="no_zero" value="false" />\n+\t\t    <output name="output_histogram_filename">\n+\t\t\t    <assert_contents>\n+\t\t\t\t    <has_text text="1,96,96,0.98" />\n+\t\t\t\t    <has_text text="1001,2,98,1.0" />\n+\t\t\t    </assert_contents>\n+\t\t    </output>\n+\t    </test>\n+\t    <test>\n+\t\t    <param name="input_sequence_filename" value="test-abund-read-2.fa" />\n+\t\t    <param name="type" value="specific" />\n+\t\t    <param name="tablesize_specific" value="1e7" />\n+\t\t    <param name="n_tables" value="2" />\n+\t\t    <param name="ksize" value="17" />\n+\t\t    <param name="no_zero" value="false" />\n+\t\t    <param name="bigcount" value="false" />\n+\t\t    <output name="output_histogram_filename">\n+\t\t\t    <assert_contents>\n+\t\t\t\t    <has_text text="1,96,96,0.98" />\n+\t\t\t\t    <has_text text="255,2,98,1.0" />\n+\t\t\t    </assert_contents>\n+\t\t    </output>\n+\t    </test>\n+\n     </tests>\n     <help>\n Calculate the abundance distribution of k-mers from a single sequence file.\n@@ -265,7 +267,7 @@\n \n     </help>\n     <citations>\n-        <citation type="bibtex">@article{khmer2015,\n+\t<citation type="bibtex">@article{khmer2015,\n      author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n   and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n   Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n@@ -290,6 +292,6 @@\n      publisher = "F1000",\n      url = "http://dx.doi.org/10.12688/f1000research.6924.1"\n   }</citation>\n-        <citation type="doi">10.1371/journal.pone.0101271</citation>\n-    </citations>\n+\t<citation type="doi">10.1371/journal.pone.0101271</citation>\n+\t</citations>\n </tool>\n'
b
diff -r f458fd505ec6 -r 48d7f2580fe5 filter-abund.norm.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-abund.norm.xml Fri Sep 11 14:43:53 2015 -0400
[
b'@@ -0,0 +1,241 @@\n+<tool id="gedlab-khmer-filter-abund" name="Filter by abundance" version="2.0-3">\n+\n+\t<description>\n+\t\tTrims fastq/fasta sequences at k-mers of a given abundance\n+\t\tbased on a provided k-mer counting table.\n+\t</description>\n+\t<macros>\n+\t\t<macro name="requirements" type="xml">\n+\t\t<requirements>\n+\t\t\t<requirement type="package" version="2.0">khmer</requirement>\n+\t\t</requirements>\n+\t</macro>\n+\t<macro name="version" type="xml">\n+\t\t<version_command>filter-abund.py --version</version_command>\n+\t</macro>\n+\t<macro name="tableinputs" type="xml">\n+\t\t<conditional name="parameters">\n+\t\t\t<param help="ksize, n_tables, a specific tablesize" label="Advanced Parameters" name="type" type="select">\n+\t\t\t\t<option selected="true" value="simple">\n+\t\t\t\t\tHide\n+\t\t\t\t</option>\n+\t\t\t\t<option value="specific">\n+\t\t\t\t\tShow\n+\t\t\t\t</option>\n+            \t\t</param>\n+\t\t\t<when value="simple">\n+\t\t\t\t<param display="radio" label="Sample Type" name="tablesize" type="select">\n+\t\t\t\t\t<option selected="true" value="1e9">\n+\t\t\t\t\t\tMicrobial Genome\n+\t\t\t\t\t</option>\n+\t\t\t\t\t<option value="2e9">\n+\t\t\t\t\t\tAnimal Transcriptome\n+\t\t\t\t\t</option>\n+\t\t\t\t\t<option value="4e9">\n+\t\t\t\t\t\tSmall Animal Genome or\n+\t\t\t\t\t\tLow-Diversity Metagenome\n+\t\t\t\t\t</option>\n+\t\t\t\t\t<option value="16e9">\n+\t\t\t\t\t\tLarge Animal Genome\n+\t\t\t\t\t</option>\n+\t\t\t\t</param>\n+\t\t\t</when>\n+\t\t\t<when value="specific">\n+\t\t\t\t<param help="k-mer size to use" label="ksize" name="ksize" type="integer" value="20" />\n+\t\t\t\t<param help="number of tables to use" label="n_tables" min="1" name="n_tables" type="integer" value="4" />\n+\t\t\t\t<param help="lower bound on the tablesize to use" label="tablesize" name="tablesize_specific" type="text" />\n+\t\t\t</when>\n+\t\t</conditional>\n+\t</macro>\n+\t<macro name="input_sequences_filenames" type="xml">\n+                <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" help="Put in order of precedence such as longest reads first." label="FAST[AQ] file(s)" multiple="true" name="inputs" type="data" />\n+\t</macro>\n+\t<macro name="input_sequence_filename" type="xml">\n+                <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" label="FAST[AQ] file(s)" name="input_sequence_filename" type="data" />\n+\t</macro>\n+\t<macro name="input_counting_table_filename" type="xml">\n+                <param format="ct" help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." label="the k-mer counting table to query" name="input_counting_table_filename" type="data" />\n+\t</macro>\n+\t<macro name="abundance-histogram-output" type="xml">\n+                <data format="txt" label="${tool.name} k-mer abundance histogram. The                         columns are: (1) k-mer abundance, (2) k-mer count, (3)                         cumulative count, (4) fraction of total distinct                         k-mers." name="output_histogram_filename">\n+                </data>\n+\t</macro>\n+\t<macro name="output_sequences" type="xml">\n+                <data format_source="inputs" label="${tool.name} processed nucleotide sequence file" name="output">\n+                        <discover_datasets directory="output" pattern="__name__" visible="true" />\n+                </data>\n+\t</macro>\n+\t<macro name="output_sequences_single" type="xml">\n+                <data format_source="input_sequence_filename" label="${tool.name} processed nucleotide sequence file" name="output" />\n+\t</macro>\n+\t<macro name="input_zero" type="xml">\n+                <param checked="true" falsevalue="--no-zero" help="Output zero count bins (--no-zero)" name="zero" truevalue="" type="boolean" />\n+\t</macro>\n+\t<macro name="software-citation" type="xml">\n+\t\t<citation type="bibtex">@article{khmer2015,\n+     author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n+  and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n+  Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n+  Jacob and Fenzl, Thomas '..b'sequence has high enough coverage; median abundance &gt; 20 (--variable_coverage)" label="Variable coverage" name="variable_coverage" truevalue="--variable-coverage" type="boolean" />\n+\t\t<param help="Trim at k-mers below this abundance. (--cutoff)" label="cutoff" name="cutoff" type="integer" value="2" />\n+\t\t<param format="ct" help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." label="the k-mer counting table to query" name="input_counting_table_filename" type="data" />\n+\t</inputs>\n+\t<outputs>\n+\t\t<data format_source="inputs" label="${tool.name} processed nucleotide sequence file" name="output">\n+                        <discover_datasets directory="output" pattern="__name__" visible="true" />\n+                </data>\n+\t</outputs>\n+\t<tests>\n+                <test>\n+                        <param name="inputs" value="test-abund-read-2.fa" />\n+\t\t\t<param ftype="ct" name="input_counting_table_filename" value="test-abund-read-2.ct" />\n+                        <output name="output">\n+\t\t\t\t<discover_dataset name="test-abund-read-2.fa.abundfilt"> \n+                                \t<assert_contents>\n+                                        \t<has_text text="GGTTGACGGGGCTCAGGG" />\n+                                \t</assert_contents>\n+\t\t\t\t</discover_dataset>\n+                        </output>\n+                </test>\n+                <test>\n+\t\t\t<param name="inputs" value="test-abund-read-2.fa" />\n+\t\t\t<param ftype="ct" name="input_counting_table_filename" value="test-abund-read-2.ct" />\n+\t\t\t<param name="cutoff" value="1" />\n+                        <output name="output">\n+\t\t\t\t<discover_dataset name="test-abund-read-2.fa.abundfilt">\n+                                \t<assert_contents>\n+                                        \t<has_text text="GGTTGACGGGGCTCAGGG" />\n+                                \t</assert_contents>\n+\t\t\t\t</discover_dataset>\n+                        </output>\n+                </test>\n+        </tests>\n+\t<help>\n+Trim sequences at a minimum k-mer abundance.\t\t\n+\t\t\n+Trimmed sequences will be placed in ${input_sequence_filename}.abundfilt\n+for each input sequence file. If the input sequences are from RNAseq or\n+metagenome sequencing then `--variable-coverage` should be used.\n+\n+\t</help>\n+\t<citations>\n+\t\t<citation type="bibtex">@article{khmer2015,\n+     author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine\n+  and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,\n+  Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,\n+  Jacob and Fenzl, Thomas and Fish, Jordan and Garcia-Gutierrez, Leonor and\n+  Garland, Phillip and Gluck, Jonathan and Gonz&#225;lez, Iv&#225;n and Guermond, Sarah\n+  and Guo, Jiarong and Gupta, Aditi and Herr, Joshua R. and Howe, Adina and\n+  Hyer, Alex and H&#228;rpfer, Andreas and Irber, Luiz and Kidd, Rhys and Lin, David\n+  and Lippi, Justin and Mansour, Tamer and McA\'Nulty, Pamela and McDonald, Eric\n+  and Mizzi, Jessica and Murray, Kevin D. and Nahum, Joshua R. and Nanlohy,\n+  Kaben and Nederbragt, Alexander Johan and Ortiz-Zuazaga, Humberto and Ory,\n+  Jeramia and Pell, Jason and Pepe-Ranney, Charles and Russ, Zachary N and\n+  Schwarz, Erich and Scott, Camille and Seaman, Josiah and Sievert, Scott and\n+  Simpson, Jared and Skennerton, Connor T. and Spencer, James and Srinivasan,\n+  Ramakrishnan and Standage, Daniel and Stapleton, James A. and Stein, Joe and\n+  Steinman, Susan R and Taylor, Benjamin and Trimble, Will and Wiencko, Heather\n+  L. and Wright, Michael and Wyss, Brian and Zhang, Qingpeng and zyme, en and\n+  Brown, C. Titus"\n+     title = "The khmer software package: enabling efficient nucleotide\n+  sequence analysis",\n+     year = "2015",\n+     month = "08",\n+     publisher = "F1000",\n+     url = "http://dx.doi.org/10.12688/f1000research.6924.1"\n+  }</citation>\n+\t<citation type="doi">10.1371/journal.pone.0101271</citation>\n+\t</citations>\n+</tool>\n'
b
diff -r f458fd505ec6 -r 48d7f2580fe5 filter-abund.xml
--- a/filter-abund.xml Fri Sep 11 14:14:29 2015 -0400
+++ b/filter-abund.xml Fri Sep 11 14:43:53 2015 -0400
[
@@ -1,6 +1,6 @@
 <tool id="gedlab-khmer-filter-abund"
  name="Filter by abundance"
- version="2.0-3">
+ version="2.0-4">
 
  <description>
  Trims fastq/fasta sequences at k-mers of a given abundance
@@ -77,9 +77,8 @@
  <help><![CDATA[
 Trim sequences at a minimum k-mer abundance.
 
-Trimmed sequences will be placed in ${input_sequence_filename}.abundfilt
-for each input sequence file. If the input sequences are from RNAseq or
-metagenome sequencing then `--variable-coverage` should be used.
+If the input sequences are from RNAseq or metagenome sequencing then
+`--variable-coverage` should be used.
 ]]>
  </help>
  <citations>