Mercurial > repos > iuc > khmer_normalize_by_median
diff normalize-by-median.xml @ 0:5531deeabd2c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
author | iuc |
---|---|
date | Wed, 11 Nov 2015 09:46:18 -0500 |
parents | |
children | 73314e26dcfd |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/normalize-by-median.xml Wed Nov 11 09:46:18 2015 -0500 @@ -0,0 +1,139 @@ +<tool id="khmer_normalize_by_median" name="Normalize By Median" version="@WRAPPER_VERSION@.0"> + <description>Filters a fastq/fasta file using digital normalization via median k-mer abundances</description> + <macros> + <token name="@BINARY@">normalize-by-median.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +set -xu && +#for $num, $input in enumerate($inputs) + ln -s ${input} sequence-${num} && +#end for +mkdir output && +cd output && +normalize-by-median.py +${paired_switch} +${force_single_switch} +@TABLEPARAMS@ +--cutoff=${cutoff} +#if $unpaired_reads_filename + --unpaired-reads=${unpaired_reads_filename} +#end if +#if $save_countgraph + --savegraph=${countgraph} +#end if +#if $countgraph_to_load + --loadgraph=${countgraph_to_load} +#end if +--report=${report} +../sequence-* +]]> + </command> + <inputs> + <expand macro="input_sequences_filenames" /> + <param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue="" + label="Require all sequences be properly paired?" + help="(--paired) The tool will fail if given improperly paired reads and this option is selected." /> + <param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue="" + label="Ignore all pairing information?" + help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." /> + <param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true" + label="Extra unpaired reads" + help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." /> + <param name="countgraph_to_load" type="data" format="oxlicg" optional="true" + label="Optional k-mer countgraph" + help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." /> + <param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" /> + <param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" /> + <expand macro="tableinputs" /> + </inputs> + <outputs> + <data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph"> + <filter>save_countgraph == True</filter> + </data> + <data name="report" format="txt" label="${tool.name} report" /> + <collection name="sequences" type="list"> + <discover_datasets pattern="__name__" directory="output" /> + </collection> + </outputs> + <tests> + <test> + <param name="inputs" value="test-abund-read-2.fa"/> + <param name="type" value="specific" /> + <param name="cutoff" value="1" /> + <param name="ksize" value="17" /> + <output name="report" file="normalize-by-median.report.txt" /> + <output_collection name="sequences" type="list"> + <element name="sequence-0.keep"> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGGGG" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="inputs" value="test-abund-read-2.fa" /> + <param name="type" value="specific" /> + <param name="cutoff" value="2" /> + <param name="ksize" value="17" /> + <output name="report" file="normalize-by-median.c2.report.txt" /> + <output_collection name="sequences" type="list"> + <element name="sequence-0.keep"> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGGGG" /> + <has_text text="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="inputs" value="test-abund-read-paired.fa" /> + <param name="type" value="specific" /> + <param name="cutoff" value="1" /> + <param name="ksize" value="17" /> + <param name="paired" value="true" /> + <output name="report" file="normalize-by-median.paired.report.txt" /> + <output_collection name="sequences" type="list"> + <element name="sequence-0.keep"> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGGGG" /> + <has_text text="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +Do digital normalization (remove mostly redundant sequences) + +Discard sequences based on whether or not their median k-mer abundance lies +above a specified cutoff. Kept sequences will be placed in <fileN>.keep. + +By default, Paired end reads will be considered together; if either read will +be kept, then both will be kept. (This keeps both reads from a fragment, and +helps with retention of repeats.) Unpaired reads are treated individually. + +If `--paired` is set then proper pairing is required and the tool will exit on +unpaired reads, although `--unpaired-reads` can be used to supply a file of +orphan reads to be read after the paired reads. + +`--force_single` will ignore all pairing information and treat reads +individually. + +With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified +file after all sequences have been processed. `--loadgraph` will load the +specified k-mer countgraph before processing the specified files. Note +that the countgraph is in same format as those produced by +`load-into-counting.py` and consumed by `abundance-dist.py`. + +@HELP_FOOTER@ +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="diginorm-citation" /> + </citations> +</tool>