diff normalize-by-median.xml @ 0:5531deeabd2c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
author iuc
date Wed, 11 Nov 2015 09:46:18 -0500
parents
children 73314e26dcfd
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/normalize-by-median.xml	Wed Nov 11 09:46:18 2015 -0500
@@ -0,0 +1,139 @@
+<tool id="khmer_normalize_by_median" name="Normalize By Median" version="@WRAPPER_VERSION@.0">
+    <description>Filters a fastq/fasta file using digital normalization via median k-mer abundances</description>
+    <macros>
+        <token name="@BINARY@">normalize-by-median.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+#for $num, $input in enumerate($inputs)
+    ln -s ${input} sequence-${num} &&
+#end for
+mkdir output &&
+cd output &&
+normalize-by-median.py
+${paired_switch}
+${force_single_switch}
+@TABLEPARAMS@
+--cutoff=${cutoff}
+#if $unpaired_reads_filename
+    --unpaired-reads=${unpaired_reads_filename}
+#end if
+#if $save_countgraph
+    --savegraph=${countgraph}
+#end if
+#if $countgraph_to_load
+    --loadgraph=${countgraph_to_load}
+#end if
+--report=${report}
+../sequence-*
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequences_filenames" />
+        <param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue=""
+            label="Require all sequences be properly paired?"
+            help="(--paired) The tool will fail if given improperly paired reads and this option is selected." />
+        <param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue=""
+            label="Ignore all pairing information?"
+            help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." />
+        <param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true"
+            label="Extra unpaired reads"
+            help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." />
+        <param name="countgraph_to_load" type="data" format="oxlicg" optional="true"
+            label="Optional k-mer countgraph"
+            help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." />
+        <param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" />
+        <param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" />
+        <expand macro="tableinputs" />
+    </inputs>
+    <outputs>
+        <data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph">
+            <filter>save_countgraph == True</filter>
+        </data>
+        <data name="report" format="txt" label="${tool.name} report" />
+        <collection name="sequences" type="list">
+            <discover_datasets pattern="__name__" directory="output" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputs" value="test-abund-read-2.fa"/>
+            <param name="type" value="specific" />
+            <param name="cutoff" value="1" />
+            <param name="ksize" value="17" />
+            <output name="report" file="normalize-by-median.report.txt" />
+            <output_collection name="sequences" type="list">
+                <element name="sequence-0.keep">
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="inputs" value="test-abund-read-2.fa" />
+            <param name="type" value="specific" />
+            <param name="cutoff" value="2" />
+            <param name="ksize" value="17" />
+            <output name="report" file="normalize-by-median.c2.report.txt" />
+            <output_collection name="sequences" type="list">
+                <element name="sequence-0.keep">
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGGGG" />
+                        <has_text text="GGTTGACGGGGCTCAGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="inputs" value="test-abund-read-paired.fa" />
+            <param name="type" value="specific" />
+            <param name="cutoff" value="1" />
+            <param name="ksize" value="17" />
+            <param name="paired" value="true" />
+            <output name="report" file="normalize-by-median.paired.report.txt" />
+            <output_collection name="sequences" type="list">
+                <element name="sequence-0.keep">
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGGGG" />
+                        <has_text text="GGTTGACGGGGCTCAGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+Do digital normalization (remove mostly redundant sequences)
+
+Discard sequences based on whether or not their median k-mer abundance lies
+above a specified cutoff. Kept sequences will be placed in <fileN>.keep.
+
+By default, Paired end reads will be considered together; if either read will
+be kept, then both will be kept. (This keeps both reads from a fragment, and
+helps with retention of repeats.) Unpaired reads are treated individually.
+
+If `--paired` is set then proper pairing is required and the tool will exit on
+unpaired reads, although `--unpaired-reads` can be used to supply a file of
+orphan reads to be read after the paired reads.
+
+`--force_single` will ignore all pairing information and treat reads
+individually.
+
+With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified
+file after all sequences have been processed. `--loadgraph` will load the
+specified k-mer countgraph before processing the specified files.  Note
+that the countgraph is in same format as those produced by
+`load-into-counting.py` and consumed by `abundance-dist.py`.
+
+@HELP_FOOTER@
+]]>    
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="diginorm-citation" />
+    </citations>
+</tool>