diff count-median.xml @ 0:42fbb6359626 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
author iuc
date Wed, 11 Nov 2015 09:45:59 -0500
parents
children 84b323860b0a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/count-median.xml	Wed Nov 11 09:45:59 2015 -0500
@@ -0,0 +1,55 @@
+<tool id="khmer_count_median" name="Count Median" version="@WRAPPER_VERSION@.0">
+    <description>Count the median/avg k-mer abundance for each sequence in the input file</description>
+    <macros>
+        <token name="@BINARY@">count-median.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+@BINARY@
+$input_countgraph_filename
+$input_sequence_filename
+$output_summary_filename
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequence_filename" />
+        <expand macro="input_countgraph_filename" />
+    </inputs>
+    <outputs>
+        <data name="output_summary_filename" format="txt"
+            label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+            <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+            <output name="output_summary_filename">
+                <assert_contents>
+                    <has_text text="seq,1001,1001.0,0.0,18" />
+                    <has_text text="895:1:37:17593:9954/1,1,21.408163071,141.391921997,114" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Count the median/avg k-mer abundance for each sequence in the input file,
+based on the k-mer counts in the given k-mer countgraph. Can be used to
+estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The
+output file contains sequence id, median, average, stddev, and seq length;
+fields are separated by spaces. For khmer 1.x count-median.py will split
+sequence names at the first space which means that some sequence formats (e.g.
+paired FASTQ in Casava 1.8 format) will yield uninformative names. Use
+`--csv` to fix this behavior.
+
+@HELP_FOOTER@
+]]>
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="diginorm-citation" />
+    </citations>
+</tool>