comparison normalize-by-median.xml @ 0:5531deeabd2c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
author iuc
date Wed, 11 Nov 2015 09:46:18 -0500
parents
children 73314e26dcfd
comparison
equal deleted inserted replaced
-1:000000000000 0:5531deeabd2c
1 <tool id="khmer_normalize_by_median" name="Normalize By Median" version="@WRAPPER_VERSION@.0">
2 <description>Filters a fastq/fasta file using digital normalization via median k-mer abundances</description>
3 <macros>
4 <token name="@BINARY@">normalize-by-median.py</token>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="requirements" />
8 <expand macro="stdio" />
9 <expand macro="version" />
10 <command><![CDATA[
11 set -xu &&
12 #for $num, $input in enumerate($inputs)
13 ln -s ${input} sequence-${num} &&
14 #end for
15 mkdir output &&
16 cd output &&
17 normalize-by-median.py
18 ${paired_switch}
19 ${force_single_switch}
20 @TABLEPARAMS@
21 --cutoff=${cutoff}
22 #if $unpaired_reads_filename
23 --unpaired-reads=${unpaired_reads_filename}
24 #end if
25 #if $save_countgraph
26 --savegraph=${countgraph}
27 #end if
28 #if $countgraph_to_load
29 --loadgraph=${countgraph_to_load}
30 #end if
31 --report=${report}
32 ../sequence-*
33 ]]>
34 </command>
35 <inputs>
36 <expand macro="input_sequences_filenames" />
37 <param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue=""
38 label="Require all sequences be properly paired?"
39 help="(--paired) The tool will fail if given improperly paired reads and this option is selected." />
40 <param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue=""
41 label="Ignore all pairing information?"
42 help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." />
43 <param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true"
44 label="Extra unpaired reads"
45 help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." />
46 <param name="countgraph_to_load" type="data" format="oxlicg" optional="true"
47 label="Optional k-mer countgraph"
48 help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." />
49 <param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" />
50 <param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" />
51 <expand macro="tableinputs" />
52 </inputs>
53 <outputs>
54 <data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph">
55 <filter>save_countgraph == True</filter>
56 </data>
57 <data name="report" format="txt" label="${tool.name} report" />
58 <collection name="sequences" type="list">
59 <discover_datasets pattern="__name__" directory="output" />
60 </collection>
61 </outputs>
62 <tests>
63 <test>
64 <param name="inputs" value="test-abund-read-2.fa"/>
65 <param name="type" value="specific" />
66 <param name="cutoff" value="1" />
67 <param name="ksize" value="17" />
68 <output name="report" file="normalize-by-median.report.txt" />
69 <output_collection name="sequences" type="list">
70 <element name="sequence-0.keep">
71 <assert_contents>
72 <has_text text="GGTTGACGGGGCTCAGGGGG" />
73 </assert_contents>
74 </element>
75 </output_collection>
76 </test>
77 <test>
78 <param name="inputs" value="test-abund-read-2.fa" />
79 <param name="type" value="specific" />
80 <param name="cutoff" value="2" />
81 <param name="ksize" value="17" />
82 <output name="report" file="normalize-by-median.c2.report.txt" />
83 <output_collection name="sequences" type="list">
84 <element name="sequence-0.keep">
85 <assert_contents>
86 <has_text text="GGTTGACGGGGCTCAGGGGG" />
87 <has_text text="GGTTGACGGGGCTCAGGG" />
88 </assert_contents>
89 </element>
90 </output_collection>
91 </test>
92 <test>
93 <param name="inputs" value="test-abund-read-paired.fa" />
94 <param name="type" value="specific" />
95 <param name="cutoff" value="1" />
96 <param name="ksize" value="17" />
97 <param name="paired" value="true" />
98 <output name="report" file="normalize-by-median.paired.report.txt" />
99 <output_collection name="sequences" type="list">
100 <element name="sequence-0.keep">
101 <assert_contents>
102 <has_text text="GGTTGACGGGGCTCAGGGGG" />
103 <has_text text="GGTTGACGGGGCTCAGGG" />
104 </assert_contents>
105 </element>
106 </output_collection>
107 </test>
108 </tests>
109 <help><![CDATA[
110 Do digital normalization (remove mostly redundant sequences)
111
112 Discard sequences based on whether or not their median k-mer abundance lies
113 above a specified cutoff. Kept sequences will be placed in <fileN>.keep.
114
115 By default, Paired end reads will be considered together; if either read will
116 be kept, then both will be kept. (This keeps both reads from a fragment, and
117 helps with retention of repeats.) Unpaired reads are treated individually.
118
119 If `--paired` is set then proper pairing is required and the tool will exit on
120 unpaired reads, although `--unpaired-reads` can be used to supply a file of
121 orphan reads to be read after the paired reads.
122
123 `--force_single` will ignore all pairing information and treat reads
124 individually.
125
126 With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified
127 file after all sequences have been processed. `--loadgraph` will load the
128 specified k-mer countgraph before processing the specified files. Note
129 that the countgraph is in same format as those produced by
130 `load-into-counting.py` and consumed by `abundance-dist.py`.
131
132 @HELP_FOOTER@
133 ]]>
134 </help>
135 <citations>
136 <expand macro="software-citation" />
137 <expand macro="diginorm-citation" />
138 </citations>
139 </tool>