Mercurial > repos > iuc > khmer_normalize_by_median
view normalize-by-median.xml @ 6:bfd859f04a89 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
author | iuc |
---|---|
date | Fri, 07 Sep 2018 11:01:41 -0400 |
parents | 73314e26dcfd |
children | 557cc16931f4 |
line wrap: on
line source
<tool id="khmer_normalize_by_median" name="Normalize By Median" version="@WRAPPER_VERSION@.0"> <description>Filter reads using digital normalization via k-mer abundances</description> <macros> <token name="@BINARY@">normalize-by-median.py</token> <import>macros.xml</import> </macros> <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version" /> <command><![CDATA[ set -xu && #for $num, $input in enumerate($inputs) ln -s ${input} sequence-${num} && #end for mkdir output && cd output && normalize-by-median.py ${paired_switch} ${force_single_switch} @TABLEPARAMS@ --cutoff=${cutoff} #if $unpaired_reads_filename --unpaired-reads=${unpaired_reads_filename} #end if #if $save_countgraph --savegraph=${countgraph} #end if #if $countgraph_to_load --loadgraph=${countgraph_to_load} #end if --report=${report} ../sequence-* ]]> </command> <inputs> <expand macro="input_sequences_filenames" /> <param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue="" label="Require all sequences be properly paired?" help="(--paired) The tool will fail if given improperly paired reads and this option is selected." /> <param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue="" label="Ignore all pairing information?" help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." /> <param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true" label="Extra unpaired reads" help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." /> <param name="countgraph_to_load" type="data" format="oxlicg" optional="true" label="Optional k-mer countgraph" help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." /> <param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" /> <param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" /> <expand macro="tableinputs" /> </inputs> <outputs> <data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph"> <filter>save_countgraph == True</filter> </data> <data name="report" format="txt" label="${tool.name} report" /> <collection name="sequences" type="list"> <discover_datasets pattern="__name__" directory="output" /> </collection> </outputs> <tests> <test> <param name="inputs" value="test-abund-read-2.fa"/> <param name="type" value="specific" /> <param name="cutoff" value="1" /> <param name="ksize" value="17" /> <output name="report" file="normalize-by-median.report.txt" /> <output_collection name="sequences" type="list"> <element name="sequence-0.keep"> <assert_contents> <has_text text="GGTTGACGGGGCTCAGGGGG" /> </assert_contents> </element> </output_collection> </test> <test> <param name="inputs" value="test-abund-read-2.fa" /> <param name="type" value="specific" /> <param name="cutoff" value="2" /> <param name="ksize" value="17" /> <output name="report" file="normalize-by-median.c2.report.txt" /> <output_collection name="sequences" type="list"> <element name="sequence-0.keep"> <assert_contents> <has_text text="GGTTGACGGGGCTCAGGGGG" /> <has_text text="GGTTGACGGGGCTCAGGG" /> </assert_contents> </element> </output_collection> </test> <test> <param name="inputs" value="test-abund-read-paired.fa" /> <param name="type" value="specific" /> <param name="cutoff" value="1" /> <param name="ksize" value="17" /> <param name="paired" value="true" /> <output name="report" file="normalize-by-median.paired.report.txt" /> <output_collection name="sequences" type="list"> <element name="sequence-0.keep"> <assert_contents> <has_text text="GGTTGACGGGGCTCAGGGGG" /> <has_text text="GGTTGACGGGGCTCAGGG" /> </assert_contents> </element> </output_collection> </test> </tests> <help><![CDATA[ Do digital normalization (remove mostly redundant sequences) Discard sequences based on whether or not their median k-mer abundance lies above a specified cutoff. Kept sequences will be placed in <fileN>.keep. By default, Paired end reads will be considered together; if either read will be kept, then both will be kept. (This keeps both reads from a fragment, and helps with retention of repeats.) Unpaired reads are treated individually. If `--paired` is set then proper pairing is required and the tool will exit on unpaired reads, although `--unpaired-reads` can be used to supply a file of orphan reads to be read after the paired reads. `--force_single` will ignore all pairing information and treat reads individually. With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified file after all sequences have been processed. `--loadgraph` will load the specified k-mer countgraph before processing the specified files. Note that the countgraph is in same format as those produced by `load-into-counting.py` and consumed by `abundance-dist.py`. @HELP_FOOTER@ ]]> </help> <citations> <expand macro="software-citation" /> <expand macro="diginorm-citation" /> </citations> </tool>