comparison epic2 0.41/epic2_wrapper.xml @ 8:5f3952470864 draft

Uploaded 0.0.41
author mpaya
date Mon, 03 Feb 2020 11:43:12 -0500
parents
children ea03253198b4
comparison
equal deleted inserted replaced
7:8b4cb7f52809 8:5f3952470864
1 <tool id="epic2" name="epic2" version="@VERSION@.0">
2 <description>peak calling of broad ChIP-Seq marks</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7
8 <stdio>
9 <exit_code range="1:125" level="fatal" description="Unknown error occurred" />
10 <exit_code range="130:" level="fatal" description="Unknown error occurred" />
11 <regex match="epic2: (command ){0,1}not found" source="stderr" level="fatal" description="The epic2 python package is not properly installed, contact Galaxy administrators" />
12 </stdio>
13
14 <command><![CDATA[
15
16 ##set up treatment files, extension must be bed, bedpe, bam, sam
17 #set $t_file_list = list()
18 #if str($treatment.t_multi_select) == "No":
19 #if $treatment.input_treatment_file.is_of_type('bed')
20 #set $t_file = 'ChIP_file.bed'
21 ln -s '$treatment.input_treatment_file' $t_file &&
22 #elif $treatment.input_treatment_file.is_of_type('bam')
23 #set $t_file = 'ChIP_file.bam'
24 ln -s '$treatment.input_treatment_file' $t_file &&
25 ln -s '$treatment.input_treatment_file.metadata.bam_index' ${t_file}.bai &&
26 #elif $treatment.input_treatment_file.is_of_type('sam')
27 #set $t_file = 'ChIP_file.sam'
28 ln -s '$treatment.input_treatment_file' $t_file &&
29 #end if
30 $t_file_list.append($t_file)
31 #else
32 #set $inputs = $treatment.input_treatment_file
33 #for $i, $f in enumerate($inputs)
34 #if $f.is_of_type('bed')
35 #set $t_file = ''.join(['ChIP_file_',str($i),'.bed'])
36 ln -s '$f' $t_file &&
37 #elif $f.is_of_type('bam')
38 #set $t_file = ''.join(['ChIP_file_',str($i),'.bam'])
39 ln -s '$f' $t_file &&
40 ln -s '$f.metadata.bam_index' ${t_file}.bai &&
41 #elif $f.is_of_type('sam')
42 #set $t_file = ''.join(['ChIP_file_',str($i),'.sam'])
43 ln -s '$f' $t_file &&
44 #end if
45 $t_file_list.append($t_file)
46 #end for
47 #end if
48
49 ##set up control files, extension must be bed, bedpe, bam, sam
50 #if str($control.c_select) == "Yes":
51 #set $c_file_list = list()
52 #if str($control.c_multiple.c_multi_select) == "No":
53 #set $f = $control.c_multiple.input_control_file
54 #if $f.is_of_type('bed')
55 #set $c_file = 'control_file.bed'
56 ln -s '$f' $c_file &&
57 #elif $f.is_of_type('bam')
58 #set $c_file = 'control_file.bam'
59 ln -s '$f' $c_file &&
60 ln -s '$f.metadata.bam_index' ${c_file}.bai &&
61 #elif $f.is_of_type('sam')
62 #set $c_file = 'control_file.sam'
63 ln -s '$f' $c_file &&
64 #end if
65 $c_file_list.append($c_file)
66 #else
67 #set $inputs = $control.c_multiple.input_control_file
68 #for $i, $f in enumerate($inputs)
69 #if $f.is_of_type('bed')
70 #set $c_file = ''.join(['control_file',str($i),'.bed'])
71 ln -s '$f' $c_file &&
72 #elif $f.is_of_type('bam')
73 #set $c_file = ''.join(['control_file',str($i),'.bam'])
74 ln -s '$f' $c_file &&
75 ln -s '$f.metadata.bam_index' ${c_file}.bai &&
76 #elif $f.is_of_type('sam')
77 #set $c_file = ''.join(['control_file',str($i),'.sam'])
78 ln -s '$f' $c_file &&
79 #end if
80 $c_file_list.append($c_file)
81 #end for
82 #end if
83 #end if
84
85 epic2
86
87 ## Treatment File(s)
88 -t ${ ' '.join( $t_file_list ) }
89
90 ## Control File(s)
91 #if str($control.c_select) == "Yes":
92 -c ${ ' '.join( $c_file_list ) }
93 #end if
94
95 ## Predefined or Custom Genome
96 #if str($genome.g_select) == "Yes":
97 --genome ${genome.builtin_genome}
98 #else
99 #if str($genome.chromsizes.chr_select) == "No":
100 #if $genome.chromsizes.cs_file.is_of_type('fasta'):
101 --chromsizes <(awk '/^>/ {if (seqlen) print seqlen;printf substr($1,2) "\t";seqlen=0;next}
102 {seqlen+=length($0)}END{print seqlen}' '${genome.chromsizes.cs_file}')
103 #else
104 --chromsizes ${genome.chromsizes.cs_file}
105 #end if
106 #else
107 #if $genome.chromsizes.builtin_fasta.fields.path
108 --chromsizes <(awk '/^>/ {if (seqlen) print seqlen;printf substr($1,2) "\t";seqlen=0;next}
109 {seqlen+=length($0)}END{print seqlen}' '${genome.chromsizes.builtin_fasta.fields.path}')
110 #end if
111 #end if
112 #end if
113 #if $genome.egf:
114 --effective-genome-fraction ${genome.egf}
115 #end if
116
117 #if $fdr:
118 -fdr $fdr
119 #end if
120
121 ## BAM OPTIONS
122 #if $bam_options.required_flag:
123 --required-flag $bam_options.required_flag
124 #end if
125
126 #if $bam_options.filter_flag:
127 --filter-flag $bam_options.filter_flag
128 #end if
129
130 #if $bam_options.mapq:
131 --mapq $bam_options.mapq
132 #end if
133
134 #if $bam_options.autodetect_chroms:
135 --autodetect-chroms
136 #end if
137
138 #if $bam_options.discard_chroms:
139 --discard-chromosomes-pattern $bam_options.discard_chroms
140 #end if
141
142 ## ADVANCED OPTIONS
143 #if $advanced_options.keep_dupes:
144 --keep-duplicates
145 #end if
146
147 #if $advanced_options.bin_size:
148 --bin-size $advanced_options.bin_size
149 #end if
150
151 #if $advanced_options.gaps_allowed:
152 --gaps-allowed $advanced_options.gaps_allowed
153 #end if
154
155 #if $advanced_options.fragment_size:
156 --fragment-size $advanced_options.fragment_size
157 #end if
158
159 #if $advanced_options.original_algorithm:
160 --original-algorithm
161 #end if
162
163 #if $advanced_options.original_stats:
164 --original-statistics
165 #end if
166
167 > ${peaks}
168 2> >(awk 'NF' >&2)
169
170 #if $to_bed:
171 &&
172 awk 'NR>1{if ($4==0) {pv=500;qv=500}else{pv=-log($4)/log(10);qv=-log($9)/log(10)};
173 print $1,$2,$3,"island_"NR-1,int($5),$6,$10,pv,qv}' OFS="\t" ${peaks} > ${bed_peaks}
174 #end if
175
176 ]]></command>
177
178 <inputs>
179 <conditional name="treatment">
180 <param name="t_multi_select" type="select" label="Are you pooling Treatment Files?" help="" >
181 <option value="No" selected="True">No</option>
182 <option value="Yes">Yes</option>
183 </param>
184 <when value="No" >
185 <param name="input_treatment_file" argument="-t" type="data"
186 format="bam,sam,bed" label="ChIP-Seq Treatment File" help="(-t)" />
187 </when>
188 <when value="Yes">
189 <param name="input_treatment_file" argument="-t" type="data"
190 format="bam,sam,bed" multiple="true"
191 label="ChIP-Seq Treatment File" help="(-t)" />
192 </when>
193 </conditional>
194
195 <conditional name="control">
196 <param name="c_select" type="select" label="Do you have a Control File?" >
197 <option value="Yes">Yes</option>
198 <option value="No" selected="True">No</option>
199 </param>
200 <when value="Yes">
201 <conditional name="c_multiple">
202 <param name="c_multi_select" type="select"
203 label="Are you pooling Control Files?" help="" >
204 <option value="No" selected="True">No</option>
205 <option value="Yes">Yes</option>
206 </param>
207 <when value="No" >
208 <param name="input_control_file" argument="-c" type="data"
209 format="bam,sam,bed" label="ChIP-Seq Control File"
210 help="(-c)" />
211 </when>
212 <when value="Yes">
213 <param name="input_control_file" argument="-c" type="data"
214 format="bam,sam,bed" multiple="true"
215 label="ChIP-Seq Control File" help="(-c)" />
216 </when>
217 </conditional>
218 </when>
219 <when value="No">
220 <param name="evalue" argument="-e" type="integer" optional="True"
221 label="e-value" help="The E-value controls the genome-wide error
222 rate of identified islands under the random
223 background assumption. Should be used when not using
224 a control library. Default 1000." />
225 </when>
226 </conditional>
227
228 <conditional name="genome">
229 <param name="g_select" type="select" label="Is your genome indexed?" >
230 <option value="Yes" selected="True">Yes</option>
231 <option value="No">No</option>
232 </param>
233 <when value="Yes">
234 <expand macro="effectiveGenomeSize" />
235 <param name="egf" argument="-egf" type="float" min="0" max="1"
236 optional="True" label="Effective genome fraction"
237 help="Use a different effective genome fraction than the
238 one included in epic2, which depends on genome and
239 readlength. (-egf)" />
240 </when>
241 <when value="No">
242 <conditional name="chromsizes">
243 <param name="chr_select" type="select" label="Use an indexed fasta file?"
244 help="Chromosome sizes will be calculated from the provided fasta file." >
245 <option value="No">No</option>
246 <option value="Yes" selected="True">Yes</option>
247 </param>
248 <when value="No" >
249 <param name="cs_file" argument="--chromsizes" type="data"
250 format="fasta,txt,tabular,tsv" label="Chromosome sizes"
251 help="Provide a fasta file for automated calculation,
252 or a tab-separated file with two columns:
253 chromosome names and sizes. (--chromsizes)" />
254 </when>
255 <when value="Yes">
256 <param name="builtin_fasta" argument="--chromsizes" type="select"
257 optional="True" label="Genome for fasta file"
258 help="(--chromsizes)" >
259 <options from_data_table="fasta_indexes">
260 <filter type="sort_by" column="2" />
261 <validator type="no_options" message="No indexes are available" />
262 </options>
263 </param>
264 </when>
265 </conditional>
266 <param name="egf" argument="-egf" type="float" min="0" max="1"
267 optional="True" label="Effective genome fraction"
268 help="The effective genome fraction is the proportion
269 of the genome that is mappable, excluding Ns. (-egf)" />
270 </when>
271 </conditional>
272
273 <param name="fdr" argument="-fdr" type="float" min="0" max="1"
274 optional="True" label="False discovery rate cutoff"
275 help="Remove all islands with an FDR above cutoff. Default 0.05 (-fdr)" />
276
277 <param name="to_bed" type="boolean" checked="false" label="Convert output to BED format?"/>
278
279 <section name="bam_options" title="BAM Options">
280 <param name="required_flag" argument="--required-flag" type="integer"
281 optional="True" label="Required flag"
282 help="Keep reads with these bits set in flag. Same as `samtools
283 view -f`. Default 0. (--required-flag)" />
284 <param name="filter_flag" argument="--filter-flag" type="integer"
285 optional="True" label="Filter flag"
286 help="Discard reads with these bits set in flag. Same as `samtools
287 view -F`. Default 1540 (hex: 0x604). (--filter-flag)" />
288 <param name="mapq" argument="--mapq" type="integer"
289 optional="True" label="Mapping quality"
290 help="Discard reads with mapping quality lower than this. Default 5. (--mapq)" />
291 <param name="autodetect_chroms" type="boolean" checked="false"
292 truevalue="--autodetect-chroms" falsevalue="" label="Autodetect chromosomes?"
293 help="Autodetect chromosomes from bam file. Use with
294 --discard-chromosomes flag to avoid non-canonical
295 chromosomes. (--autodetect-chroms)" />
296 <param name="discard_chroms" argument="--discard-chromosomes-pattern"
297 type="text" optional="True" label="Discard chromosomes pattern"
298 help="Discard reads from chromosomes matching
299 this pattern. Default '_'. Note that if you are not
300 interested in the results from non-canonical
301 chromosomes, you should ensure they are removed with
302 this flag, otherwise they will make the statistical
303 analysis too stringent. (--discard-chromosomes-pattern)"/>
304 </section>
305
306 <section name="advanced_options" title="Advanced Options">
307 <param name="keep_dupes" type="boolean" checked="false"
308 truevalue="--keep-duplicates" falsevalue="" label="Keep duplicates?"
309 help="Keep reads mapping to the same position on the same
310 strand within a library. (--keep-duplicates)" />
311 <param name="bin_size" argument="--bin-size" type="integer"
312 optional="True" label="Bin size"
313 help="Size of the windows to scan the genome. BIN-SIZE is the
314 smallest possible island. Default 200. (--bin-size)" />
315 <param name="gaps_allowed" argument="--gaps-allowed" type="integer"
316 optional="True" label="Gaps allowed"
317 help="This number is multiplied by the window size to determine
318 the number of gaps (ineligible windows) allowed
319 between two eligible windows. Default 3. (--gaps-allowed)"/>
320 <param name="fragment_size" argument="--fragment-size" type="integer"
321 optional="True" label="Fragment size"
322 help="(Single end reads only) Size of the sequenced fragment.
323 Each read is extended half the fragment size from the 5' end.
324 Default 150 (i.e. extend by 75). (--fragment-size)" />
325 <param name="original_algorithm" type="boolean" checked="false"
326 truevalue="--original-algorithm" falsevalue=""
327 label="Compute p-values with SICER original algorithm?"
328 help="Use the original SICER algorithm, without the epic2 fix.
329 This will use all reads in your files to compute
330 the p-values, including those falling outside the
331 genome boundaries. (--original-algorithm)" />
332 <param name="original_stats" type="boolean" checked="false"
333 truevalue="--original-statistics" falsevalue=""
334 label="Compute p-values with SICER original algorithm?"
335 help="Use the original SICER way of computing the
336 statistics. Like SICER itself, this method raises an
337 error on large datasets. Only included for debugging-
338 purposes. (--original-statistics)" />
339 </section>
340 </inputs>
341
342 <outputs>
343 <data format="tabular" name="peaks" label="${tool.name} on ${on_string}"/>
344 <data format='bed' name='bed_peaks' label="${tool.name} on ${on_string}: BED">
345 <filter>to_bed</filter>
346 </data>
347
348 </outputs>
349
350 <tests>
351 <test>
352 <param name="input_treatment_file" value="test.bam" ftype="bam" />
353 <param name="c_select" value="Yes" />
354 <param name="input_control_file" value="control.bam" ftype="bam"/>
355 <output name="peaks" file="epic2_results.txt"/>
356 </test>
357 <test>
358 <param name="input_treatment_file" value="test.bed.gz" ftype="bed" />
359 <param name="c_select" value="Yes" />
360 <param name="input_control_file" value="control.bed.gz" ftype="bed"/>
361 <output name="peaks" file="epic2_results1.txt"/>
362 </test>
363 <test>
364 <param name="input_treatment_file" value="test_ChIP.bam" ftype="bam" />
365 <param name="c_select" value="Yes" />
366 <param name="input_control_file" value="test_Input.bam" ftype="bam"/>
367 <param name="g_select" value="No" />
368 <param name="chr_select" value="No" />
369 <param name="cs_file" value="test_chromsizes.txt" />
370 <param name="egf" value="0.99" />
371 <param name="original_algorithm" value="Yes" />
372 <output name="peaks" file="epic2_results2.txt"/>
373 </test>
374 <test>
375 <param name="input_treatment_file" value="test_ChIP.bam" ftype="bam" />
376 <param name="c_select" value="Yes" />
377 <param name="input_control_file" value="test_Input.bam" ftype="bam"/>
378 <param name="g_select" value="No" />
379 <param name="chr_select" value="No" />
380 <param name="cs_file" value="test_fasta.fasta" />
381 <param name="egf" value="0.99" />
382 <param name="to_bed" value="Yes" />
383 <param name="mapq" value="10" />
384 <param name="bin_size" value="100" />
385 <param name="gaps_allowed" value="0" />
386 <output name="peaks" file="epic2_results3.txt"/>
387 <output name="bed_peaks" file="epic2_results3.bed"/>
388 </test>
389 </tests>
390
391 <help>
392 epic2 is an ultraperformant reimplementation of SICER, a Chip-Seq broad peak/diffuse domain finder.
393 epic2 is focused on speed, low memory overhead and ease-of-use.
394 Software documentation may be found on https://github.com/biocore-ntnu/epic2.
395
396 **Accepted input**
397
398 epic2 is designed to be used with ChIP-Seq data from treatment samples, though control samples are encouraged to increase specificity. Single or multiple files are allowed as input, with same or different file formats (file extension must be bed, bedpe, bam or sam). To use multiple files as input (either as treatment or control samples), group them in a collection and select to pool files, otherwise Galaxy will run them in batch mode.
399
400 epic2 works with only a treatment file specified as input. In this case, epic2 will run with default parameters, using the pre-indexed human genome hg19 and a FDR cutoff of 0.05. Several genomes are indexed and included on the installation of epic2, although custom genomes may be used. If your genome is not already indexed, two options are provided. One of them is to select a fasta file from which calculate chromosome sizes; the second option is to provide a tab-separated list with one chromosome name and length per row. On custom genomes, the effective genome fraction may be introduced (for more information see [deepTools](https://deeptools.readthedocs.io/en/latest/content/feature/effectiveGenomeSize.html)).
401
402 **Broad peaks format**
403
404 The output of epic2 contains called peaks in a table that does not follow any standard format. This table may be converted to BED format, assigning a unique name to each peak (islands). This format follows the standard from ENCODE, BED 6+3, and contains the following columns:
405
406 * **1.** Chrom
407 * **2.** Start
408 * **3.** End
409 * **4.** Name
410 * **5.** Score
411 * **6.** Strand
412 * **7.** log2FoldChange
413 * **8.** -log10PValue
414 * **9.** -log10FDR
415
416 .. class:: warningmark
417
418 On columns 8 and 9, the max value is set to 500 when Pvalue == 0.0.
419
420 Tool adapted to Galaxy by Miriam PayĆ”.
421
422
423 </help>
424 <expand macro="citations" />
425 </tool>