comparison rasusa.xml @ 0:49793e8a86f7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rasusa commit 547fd33a419af07f6f90a2daa2c00fa82b1d3ae5
author iuc
date Wed, 21 Feb 2024 11:17:57 +0000
parents
children 173642bff2be
comparison
equal deleted inserted replaced
-1:000000000000 0:49793e8a86f7
1 <tool id="rasusa" name="rasusa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
2 <description>Randomly subsample reads to a specified coverage</description>
3 <macros>
4 <token name="@TOOL_VERSION@">0.8.0</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@FORMATS@">fastqsanger,fastqsanger.gz,fasta,fasta.gz</token>
7 <xml name="size_units">
8 <option value="b">bases</option>
9 <option value="k">Kilo bases</option>
10 <option value="m">Mega bases</option>
11 <option value="g">Giga bases</option>
12 <option value="t">Tera bases</option>
13 </xml>
14 </macros>
15 <xrefs>
16 <xref type='bio.tools'>rasusa</xref>
17 </xrefs>
18 <requirements>
19 <requirement type="package" version="@TOOL_VERSION@">rasusa</requirement>
20 </requirements>
21
22 <command detect_errors="exit_code"><![CDATA[
23 rasusa
24 #if str( $input.input_selector ) == "paired":
25 #set r1_ext = $input.reads1.extension
26 #set r2_ext = $input.reads2.extension
27 -i '${input.reads1}'
28 -i '${input.reads2}'
29 -o 'paired_out1.$r1_ext'
30 -o 'paired_out2.$r2_ext'
31 #elif str( $input.input_selector ) == "paired_collection":
32 #set r1_ext = $input.collection.forward.extension
33 #set r2_ext = $input.collection.reverse.extension
34 -i '${input.collection.forward}'
35 -i '${input.collection.reverse}'
36 -o 'paired_out1.$r1_ext'
37 -o 'paired_out2.$r2_ext'
38 #else:
39 #set r1_ext = $input.reads.extension
40 -i '${input.reads}'
41 -o 'single_out.$r1_ext'
42 #end if
43 #if str( $subsample.type ) == "coverage":
44 --genome-size '$subsample.genome_size$subsample.genome_size_unit'
45 --coverage $subsample.coverage
46 #elif str( $subsample.type ) == "num_bases":
47 --bases '$subsample.bases$subsample.num_bases_unit'
48 #elif str( $subsample.type ) == "num_reads":
49 --num $subsample.num
50 #elif str( $subsample.type ) == "frac_reads":
51 --frac $subsample.frac
52 #end if
53 -s $seed
54 #if $r1_ext.endswith(".gz") or $r2_ext.endswith(".gz")
55 --output-type g
56 #end if
57 &&
58
59 #if str( $input.input_selector ) == "paired":
60 mv 'paired_out1.$r1_ext' '$paired_output1' &&
61 mv 'paired_out2.$r2_ext' '$paired_output2'
62 #elif str( $input.input_selector ) == "paired_collection":
63 mv 'paired_out1.$r1_ext' '${collection_output.forward}' &&
64 mv 'paired_out2.$r2_ext' '${collection_output.reverse}'
65 #else:
66 mv 'single_out.$r1_ext' '$single_output'
67 #end if
68 ]]></command>
69 <inputs>
70 <conditional name="input">
71 <param name="input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
72 <option value="paired">Paired</option>
73 <option value="single">Single</option>
74 <option value="paired_collection">Paired Collection</option>
75 </param>
76 <when value="paired">
77 <param name="reads1" type="data" format="@FORMATS@" label="Select first set of reads" help="Specify dataset with forward reads"/>
78 <param name="reads2" type="data" format="@FORMATS@" label="Select second set of reads" help="Specify dataset with reverse reads"/>
79 </when>
80 <when value="single">
81 <param name="reads" type="data" format="@FORMATS@" label="Select fasta/fastq dataset" help="Specify dataset with single reads"/>
82 </when>
83 <when value="paired_collection">
84 <param name="collection" format="@FORMATS@" type="data_collection" collection_type="paired" label="Select a paired collection"/>
85 </when>
86 </conditional>
87 <conditional name="subsample">
88 <param name="type" type="select" label="Subsample reads based on">
89 <option value="coverage">Coverage</option>
90 <option value="num_bases">Number of bases</option>
91 <option value="num_reads">Number of reads</option>
92 <option value="frac_reads">Fraction of reads</option>
93 </param>
94 <when value="coverage">
95 <param name="genome_size_unit" type="select" label="Specify genome size in">
96 <expand macro="size_units" />
97 </param>
98 <param name="genome_size" type="float" min="0" label="Genome size to calculate coverage with respect to"/>
99 <param argument="--coverage" type="float" min="0" label="The desired coverage to sub-sample the reads to"/>
100 </when>
101 <when value="num_bases">
102 <param name="num_bases_unit" type="select" label="Specify number of bases in">
103 <expand macro="size_units" />
104 </param>
105 <param name="bases" type="float" min="0" label="Explicitly set the number of bases required"/>
106 </when>
107 <when value="num_reads">
108 <param argument="--num" type="integer" value="" min="1"/>
109 </when>
110 <when value="frac_reads">
111 <param argument="--frac" type="float" value="" min="0" max="1"/>
112 </when>
113 </conditional>
114 <param type="integer" name="seed" optional="true" label="Random seed to use"/>
115 </inputs>
116 <outputs>
117 <data name="paired_output1" label="${tool.name} on ${on_string}: paired-end r1" format_source="reads1">
118 <filter>input['input_selector'] == "paired"</filter>
119 </data>
120 <data name="paired_output2" label="${tool.name} on ${on_string}: paired-end R2" format_source="reads2">
121 <filter>input['input_selector'] == "paired"</filter>
122 </data>
123 <data name="single_output" label="${tool.name} on ${on_string}: single-end" format_source="reads">
124 <filter>input['input_selector'] == 'single'</filter>
125 </data>
126 <collection name="collection_output" type="paired" label="${tool.name} on ${on_string}: paired-collection">
127 <filter>input['input_selector'] == "paired_collection"</filter>
128 <data name="forward" label="${tool.name} on ${input.collection.forward.name}: paired-end r1" format_source="collection['forward']"/>
129 <data name="reverse" label="${tool.name} on ${input.collection.reverse.name}: paired-end R2" format_source="collection['reverse']"/>
130 </collection>
131 </outputs>
132 <tests>
133 <test expect_num_outputs="1">
134 <!-- test 1: single-end fastq by coverage in bases -->
135 <conditional name="input">
136 <param name="input_selector" value="single"/>
137 <param name="reads" value="r1.fastq.gz"/>
138 </conditional>
139 <conditional name="subsample">
140 <param name="type" value="coverage"/>
141 <param name="genome_size_unit" value="b"/>
142 <param name="genome_size" value="1000"/>
143 <param name="coverage" value="1"/>
144 </conditional>
145 <param name="seed" value="1"/>
146 <output name="single_output" value="single_by_coverage_b.fastq.gz" ftype="fastqsanger.gz"/>
147 </test>
148 <test expect_num_outputs="2">
149 <!-- test 2: paired-end fastq by coverage in kb -->
150 <conditional name="input">
151 <param name="input_selector" value="paired"/>
152 <param name="reads1" value="r1.fastq.gz"/>
153 <param name="reads2" value="r2.fastq.gz"/>
154 </conditional>
155 <conditional name="subsample">
156 <param name="type" value="coverage"/>
157 <param name="genome_size_unit" value="k"/>
158 <param name="genome_size" value="1"/>
159 <param name="coverage" value="1"/>
160 </conditional>
161 <param name="seed" value="1"/>
162 <output name="paired_output1" value="paired1_by_coverage_k.fastq.gz" ftype="fastqsanger.gz"/>
163 <output name="paired_output2" value="paired2_by_coverage_k.fastq.gz" ftype="fastqsanger.gz"/>
164 </test>
165 <test expect_num_outputs="3">
166 <!-- test 3: paired-collection fastq by coverage in mb-->
167 <conditional name="input">
168 <param name="input_selector" value="paired_collection"/>
169 <param name="collection">
170 <collection type="paired">
171 <element name="forward" value="r1.fastq.gz"/>
172 <element name="reverse" value="r2.fastq.gz"/>
173 </collection>
174 </param>
175 </conditional>
176 <conditional name="subsample">
177 <param name="type" value="coverage"/>
178 <param name="genome_size_unit" value="m"/>
179 <param name="genome_size" value="0.001"/>
180 <param name="coverage" value="1"/>
181 </conditional>
182 <param name="seed" value="1"/>
183 <output_collection name="collection_output" type="paired">
184 <element name="forward" file="paired1_by_coverage_m.fastq.gz" ftype="fastqsanger.gz"/>
185 <element name="reverse" file="paired2_by_coverage_m.fastq.gz" ftype="fastqsanger.gz"/>
186 </output_collection>
187 </test>
188 <test expect_num_outputs="1">
189 <!-- test 4: single-end fasta by coverage in gb -->
190 <conditional name="input">
191 <param name="input_selector" value="single"/>
192 <param name="reads" value="r1.fasta.gz"/>
193 </conditional>
194 <conditional name="subsample">
195 <param name="type" value="coverage"/>
196 <param name="genome_size_unit" value="g"/>
197 <param name="genome_size" value="0.001"/>
198 <param name="coverage" value="0.001"/>
199 </conditional>
200 <param name="seed" value="1"/>
201 <output name="single_output" value="single_end_by_coverage_g.fasta" ftype="fasta.gz"/>
202 </test>
203 <test expect_num_outputs="2">
204 <!-- test 5: paired-end fastq by number of bases -->
205 <conditional name="input">
206 <param name="input_selector" value="paired"/>
207 <param name="reads1" value="r1.fastq"/>
208 <param name="reads2" value="r2.fastq"/>
209 </conditional>
210 <conditional name="subsample">
211 <param name="type" value="num_bases"/>
212 <param name="num_bases_unit" value="k"/>
213 <param name="bases" value="2"/>
214 </conditional>
215 <param name="seed" value="1"/>
216 <output name="paired_output1" value="paired1_by_num_bases_k.fastq" ftype="fastqsanger"/>
217 <output name="paired_output2" value="paired2_by_num_bases_k.fastq" ftype="fastqsanger"/>
218 </test>
219 <test expect_num_outputs="2">
220 <!-- test 6: paired-end fasta by number of reads -->
221 <conditional name="input">
222 <param name="input_selector" value="paired"/>
223 <param name="reads1" value="r1.fasta.gz"/>
224 <param name="reads2" value="r2.fasta.gz"/>
225 </conditional>
226 <conditional name="subsample">
227 <param name="type" value="num_reads"/>
228 <param name="num" value="5"/>
229 </conditional>
230 <param name="seed" value="1"/>
231 <output name="paired_output1" value="paired1_by_num_reads.fasta.gz" ftype="fasta.gz"/>
232 <output name="paired_output2" value="paired2_by_num_reads.fasta.gz" ftype="fasta.gz"/>
233 </test>
234 <test expect_num_outputs="3">
235 <!-- test 7: paired-collection fasta by fraction reads-->
236 <conditional name="input">
237 <param name="input_selector" value="paired_collection"/>
238 <param name="collection">
239 <collection type="paired">
240 <element name="forward" value="r1.fasta"/>
241 <element name="reverse" value="r2.fasta"/>
242 </collection>
243 </param>
244 </conditional>
245 <conditional name="subsample">
246 <param name="type" value="frac_reads"/>
247 <param name="frac" value="0.6"/>
248 </conditional>
249 <param name="seed" value="1"/>
250 <output_collection name="collection_output" type="paired">
251 <element name="forward" file="paired1_by_frac_reads.fasta" ftype="fasta"/>
252 <element name="reverse" file="paired2_by_frac_reads.fasta" ftype="fasta"/>
253 </output_collection>
254 </test>
255 </tests>
256 <help><![CDATA[
257
258 Randomly subsample reads to a specified coverage. Rasusa provides a random subsample of a read file (FASTA or FASTQ), with two ways of
259 specifying the size of the subset:
260
261 * takes a genome size and the desired coverage
262 * takes a target number of bases (nucleotides) or fraction of reads to be sampled
263 ]]></help>
264 <citations>
265 <citation type="doi">10.21105/joss.03941</citation>
266 </citations>
267 </tool>