comparison pureclip.xml @ 0:eb000bccef28 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pureclip commit e2cf796f991cbe8c96e0cc5a0056b7255ac3ad6b
author iuc
date Thu, 17 May 2018 14:11:39 -0400
parents
children fd1f57782683
comparison
equal deleted inserted replaced
-1:000000000000 0:eb000bccef28
1 <tool id="pureclip" name="PureCLIP" version="1.0.4">
2 <description>- HMM based peak caller designed for eCLIP/iCLIP data</description>
3 <requirements>
4 <requirement type="package" version="1.0.4">pureclip</requirement>
5 </requirements>
6 <command detect_errors="exit_code"><![CDATA[
7 ln -s '${target_bam_file}' target.bam &&
8 ln -f -s '${target_bam_file.metadata.bam_index}' target.bam.bai &&
9 ln -s '${genome_fasta_file}' genome.fa &&
10 #if $control_bam_file:
11 ln -s '${control_bam_file}' control.bam &&
12 ln -f -s '${control_bam_file.metadata.bam_index}' control.bam.bai &&
13 #end if
14 #if $motif_data.motif_data_selector == 'supply_CL_motifs':
15 ln -s '${motif_data.cl_motif_bed_file}' motif_hits.bed &&
16 #end if
17
18 pureclip
19 -o crosslink_sites.bed
20 -or binding_regions.bed
21 -i target.bam
22 -bai target.bam.bai
23 -g genome.fa
24 #if $learn_params_contigs
25 -iv '$learn_params_contigs'
26 #end if
27 #if $apply_hmm_contigs
28 -iv '$apply_hmm_contigs'
29 #end if
30 -dm $merge_dist
31 #if $control_bam_file:
32 -ibam control.bam
33 -ibai control.bam.bai
34 #end if
35 #if $motif_data.motif_data_selector == 'supply_CL_motifs':
36 -fis motif_hits.bed
37 -nim $motif_data.max_motif_id
38 #end if
39 #if $bc_data.bc_data_selector == 'bc_0':
40 -bc 0
41 #elif $bc_data.bc_data_selector == 'bc_1':
42 -bc 1
43 #elif $bc_data.bc_data_selector == 'manual_setting':
44 -bw $bc_data.bandwidth
45 -bwn $bc_data.bandwidthn
46 -b1p $bc_data.b1p
47 -b2p $bc_data.b2p
48 #if $bc_data.antp_option.antp_option_selector == 'antp_select':
49 -antp
50 #elif $bc_data.antp_option.antp_option_selector == 'manual_select':
51 -ntp $bc_data.antp_option.ntp
52 -ntp2 $bc_data.antp_option.ntp2
53 #end if
54 #end if
55 #if $advanced_params.advanced_params_selector == 'ap_specify':
56 $advanced_params.ld_precision
57 $advanced_params.use_viterbi
58 #if $advanced_params.max_iter_brent
59 -m $advanced_params.max_iter_brent
60 #end if
61 #if $advanced_params.max_iter_bw
62 -w $advanced_params.max_iter_bw
63 #end if
64 #if $advanced_params.g1kmin
65 -g1kmin $advanced_params.g1kmin
66 #end if
67 #if $advanced_params.g1kmax
68 -g1kmax $advanced_params.g1kmax
69 #end if
70 #if $advanced_params.g2kmin
71 -g2kmin $advanced_params.g2kmin
72 #end if
73 #if $advanced_params.g2kmax
74 -g2kmax $advanced_params.g2kmax
75 #end if
76 $advanced_params.fk
77 -mkn $advanced_params.mkn
78 -mtp $advanced_params.mtp
79 #if $advanced_params.mk
80 -mk $advanced_params.mk
81 #end if
82 #if $advanced_params.pa
83 -pa $advanced_params.pa
84 #end if
85 $advanced_params.ea1
86 $advanced_params.ea2
87 $advanced_params.et1
88 $advanced_params.et2
89 #if $advanced_params.mrtf
90 -mrtf $advanced_params.mrtf
91 #end if
92 -mtc $advanced_params.mtc
93 -pet $advanced_params.pet
94 #end if
95 ]]></command>
96 <inputs>
97 <param name="target_bam_file" type="data" format="bam" label="Target BAM file" argument="-i"/>
98 <param name="genome_fasta_file" type="data" format="fasta" label="Genome reference file" argument="-g"/>
99 <!-- Options -->
100 <param name="learn_params_contigs" type="text" optional="True"
101 label="Genomic chromosomes to learn HMM parameters" argument="-iv"
102 help="Genomic chromosomes to learn HMM parameters, e.g. 'chr1;chr2;chr3'. Contigs have to be in the same order as in BAM file. Useful to reduce runtime and memory consumption. Default: all contigs from reference file are used (useful when applying to transcript-wise alignments or poor data).">
103 <sanitizer>
104 <valid initial="string.printable">
105 <remove value="&apos;"/>
106 </valid>
107 </sanitizer>
108 </param>
109 <param name="apply_hmm_contigs" type="text" label="Contigs to apply HMM" argument="-chr" optional="True"
110 help="Contigs to apply HMM, e.g. 'chr1;chr2;chr3;'. Contigs have to be in the same order as in BAM file.">
111 <sanitizer>
112 <valid initial="string.printable">
113 <remove value="&apos;"/>
114 </valid>
115 </sanitizer>
116 </param>
117 <param name="merge_dist" type="integer" value="8" min="1"
118 label="Distance used to merge individual crosslink sites to binding regions" argument="-dm"/>
119 <param name="control_bam_file" type="data" format="bam" optional="True"
120 label="BAM file containing mapped reads from control experiment" argument="-ibam"
121 help="Mapped reads in BAM format from a control experiment, e.g. eCLIP input"/>
122 <conditional name="motif_data">
123 <param name="motif_data_selector" type="select" label="Crosslink-associated (CL) motif options">
124 <option value="no_CL_motifs_available" selected="true">No CL motifs available</option>
125 <option value="supply_CL_motifs">Supply CL motifs</option>
126 </param>
127 <when value="no_CL_motifs_available" />
128 <when value="supply_CL_motifs">
129 <param name="cl_motif_bed_file" type="data" format="bed"
130 label="FIMO input motif score covariates file" argument="-fis"
131 help="FIMO input motif score covariates file"/>
132 <param name="max_motif_id" type="integer" value="1"
133 label="Max. motif ID to use" argument="-nim"
134 help="Max. motif ID to use (Default: only covariates with motif ID 1 are used)"/>
135 </when>
136 </conditional>
137
138 <conditional name="bc_data">
139 <param name="bc_data_selector" type="select" label="Define protein binding characteristics">
140 <option value="bc_0" selected="true">RBP with short defined binding regions (-bc 0)</option>
141 <option value="bc_1">RBP with larger crosslink clusters and lower read start counts (-bc 1)</option>
142 <option value="manual_setting">Manual setting</option>
143 </param>
144 <when value="bc_0" />
145 <when value="bc_1" />
146 <when value="manual_setting">
147 <param name="bandwidth" type="integer" value="50" min="1" max="500"
148 label="Bandwidth for kernel density estimation used to access enrichment" argument="-bw"
149 help="NOTE: Increasing the bandwidth increases runtime and memory consumption"/>
150 <param name="bandwidthn" type="integer" value="50" min="1" max="500"
151 label="Bandwidth for kernel density estimation used to estimate n for binomial distributions" argument="-bwn"
152 help="For proteins that rather slide along the RNA or show long crosslink clusters increase -bwn, e.g. to 100 (should be LE 4*bw)"/>
153 <param argument="-b1p" type="float" value="0.01"
154 label="Initial value for binomial probability parameter of 'non-crosslink' state" />
155 <param argument="-b2p" type="float" value="0.15"
156 label="Initial value for binomial probability parameter of 'crosslink' state" />
157 <conditional name="antp_option">
158 <param name="antp_option_selector" type="select" label="Choose n threshold for estimating crosslink state parameters" help="Either automatically choose n threshold (-ntp, -ntp2) to estimate parameters linked to crosslink states based on expected read start count at crosslink sites, or manually set values">
159 <option value="antp_select" selected="true">Automatically choose n threshold (-ntp, -ntp2)</option>
160 <option value="manual_select">Manually set -ntp, -ntp2</option>
161 </param>
162 <when value="antp_select" />
163 <when value="manual_select">
164 <param argument="-ntp" type="integer" value="10"
165 label="Only sites with n >= ntp are used to learn binomial probability parameters"/>
166 <param argument="-ntp2" type="integer" value="0"
167 label="Only sites with n >= ntp2 are used to learn probability of transition from state '2' to '2' or '3'"
168 help="Useful for data with low truncation rates at crosslink sites or in general high fraction of non-coinciding read starts"/>
169 </when>
170 </conditional>
171 </when>
172 </conditional>
173 <conditional name="advanced_params">
174 <param name="advanced_params_selector" type="select" label="Additional advanced parameters">
175 <option value="ap_not_specify" selected="true">Do not specify</option>
176 <option value="ap_specify">Manually specify</option>
177 </param>
178 <when value="ap_not_specify" />
179 <when value="ap_specify">
180 <param name="ld_precision" label="Use higher precision to compute emission probabilities (long double)" type="boolean"
181 truevalue="-ld" falsevalue="" checked="False"
182 help="Useful in cases of extreme outliers, e.g. extreme high read start counts whose emission probabilities are close to zero and which would be discarded in default setting (along with warning messages). Note: increases memory consumption. Use in combination with '-iv' (default: double)"/>
183 <param name="use_viterbi" label="Use Viterbi instead of posterior decoding"
184 type="boolean" truevalue="-vtb" falsevalue="" checked="False"/>
185 <param name="max_iter_brent" type="integer" optional="True" min="1" max="1000"
186 label="Maximum number of iterations within BRENT algorithm" argument="-m"/>
187 <param name="max_iter_bw" type="integer" optional="True" min="0" max="500"
188 label="Maximum number of iterations within Baum-Welch algorithm" argument="-w"/>
189 <param argument="-g1kmin" type="float" optional="True"
190 label="Minimum shape k of 'non-enriched' gamma distribution" />
191 <param argument="-g1kmax" type="float" optional="True"
192 label="Maximum shape k of 'non-enriched' gamma distribution" />
193 <param argument="-g2kmin" type="float" optional="True"
194 label="Minimum shape k of 'enriched' gamma distribution" />
195 <param argument="-g2kmax" type="float" optional="True"
196 label="Maximum shape k of 'enriched' gamma distribution" />
197 <param argument="-fk" label="Do not constrain 'non-enriched' shape parameter k"
198 type="boolean" truevalue="-fk" falsevalue="" checked="False"
199 help="When incorporating input signal, do not constrain 'non-enriched' shape parameter k LE 'enriched' gamma parameter k"/>
200 <param argument="-mkn" type="float" value="1.0" min="0.5" max="1.5"
201 label="Max. k/N ratio (read start sites/N) used to learn truncation probabilities for 'non-crosslink' and 'crosslink' emission probabilities"
202 help="NOTE: high ratios might originate from mapping artifacts that can disturb parameter learning"/>
203 <param argument="-mtp" type="float" value="0.0001"
204 label="Min. transition probability from state '2' to '3'"
205 help="Helpful for poor data, where no clear distinction between 'enriched' and 'non-enriched' is possible"/>
206 <param argument="-mk" type="float" optional="True"
207 label="Minimum KDE value used for fitting left-truncated gamma distributions"
208 help="Default: corresponding to singleton read start."/>
209 <param argument="-pa" type="integer" optional="True"
210 label="Length threshold for internal poly-X stretches to get excluded" />
211 <param argument="-ea1" label="Exclude intervals containing poly-A stretches from learning"
212 type="boolean" truevalue="-ea1" falsevalue="" checked="False"/>
213 <param argument="-ea2" label="Exclude intervals containing poly-A stretches from analysis"
214 type="boolean" truevalue="-ea2" falsevalue="" checked="False"/>
215 <param argument="-et1" label="Exclude intervals containing poly-U stretches from learning"
216 type="boolean" truevalue="-et1" falsevalue="" checked="False"/>
217 <param argument="-et2" label="Exclude intervals containing poly-U stretches from analysis"
218 type="boolean" truevalue="-et2" falsevalue="" checked="False"/>
219 <param argument="-mrtf" type="float" optional="True"
220 label="Fit gamma shape k only for positions with min. covariate value" />
221 <param argument="-mtc" type="integer" value="250" min="50" max="500"
222 label="Maximum number of truncations at one position used for learning"
223 help="NOTE: for sites with counts above threshold the whole covered regions will be ignored for learning!"/>
224 <param argument="-pet" type="integer" value="7" min="2" max="50"
225 label="Prior enrichment threshold"
226 help="A KDE threshold corresponding to -pet read start counts at one position will be used for initial classification of 'non-enriched' and 'enriched' site"/>
227 </when>
228 </conditional>
229 <section name="output_options" title="Additional output options">
230 <param name="crosslink_bed_stats" type="boolean" value="False" label="Output learned parameter statistics file?"/>
231 </section>
232 </inputs>
233 <outputs>
234 <data format="bed" name="crosslink_bed_outfile" label="${tool.name} on ${on_string} crosslink sites (bed)" from_work_dir="crosslink_sites.bed"/>
235 <data format="bed" name="binding_region_bed_outfile" label="${tool.name} on ${on_string} binding regions (bed)" from_work_dir="binding_regions.bed"/>
236 <data format="txt" name="crosslink_bed_stats" label="${tool.name} on ${on_string} learned parameter statistcs (txt)" from_work_dir="crosslink_sites.bed.stats">
237 <filter>(output_options['crosslink_bed_stats'] is True)</filter>
238 </data>
239 </outputs>
240 <tests>
241 <test>
242 <param name="target_bam_file" value="aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/>
243 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/>
244 <param name="crosslink_bed_stats" value="True"/>
245 <output name="crosslink_bed_outfile" file="chrM:4000-8300.crosslink_sites.bed"/>
246 <output name="binding_region_bed_outfile" file="chrM:4000-8300.binding_regions.bed"/>
247 <output name="crosslink_bed_stats" file="chrM:4000-8300.crosslink_sites.bed.stats"/>
248 </test>
249 <test>
250 <param name="target_bam_file" value="aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/>
251 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/>
252 <param name="control_bam_file" value="input.aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/>
253 <param name="crosslink_bed_stats" value="True"/>
254 <output name="crosslink_bed_outfile" file="chrM:4000-8300.crosslink_sites.cov_input_signal.bed"/>
255 <output name="binding_region_bed_outfile" file="chrM:4000-8300.binding_regions.cov_input_signal.bed"/>
256 <output name="crosslink_bed_stats" file="chrM:4000-8300.crosslink_sites.cov_input_signal.bed.stats"/>
257 </test>
258 <test>
259 <param name="target_bam_file" value="aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/>
260 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/>
261 <param name="motif_data_selector" value="supply_CL_motifs"/>
262 <param name="cl_motif_bed_file" value="fimo_clmotif_occurences.chrM:4000-8300.bed" ftype="bed"/>
263 <param name="max_motif_id" value="4"/>
264 <param name="crosslink_bed_stats" value="True"/>
265 <output name="crosslink_bed_outfile" file="chrM:4000-8300.crosslink_sites.cov_CLmotifs.bed"/>
266 <output name="binding_region_bed_outfile" file="chrM:4000-8300.binding_regions.cov_CLmotifs.bed"/>
267 <output name="crosslink_bed_stats" file="chrM:4000-8300.crosslink_sites.cov_CLmotifs.bed.stats"/>
268 </test>
269 <test>
270 <param name="target_bam_file" value="aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/>
271 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/>
272 <param name="control_bam_file" value="input.aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/>
273 <param name="bc_data_selector" value="manual_setting"/>
274 <param name="bandwidthn" value="50"/>
275 <param name="b1p" value="0.01"/>
276 <param name="b2p" value="0.15"/>
277 <param name="antp_option_selector" value="manual_select"/>
278 <param name="ntp" value="10"/>
279 <param name="ntp2" value="0"/>
280 <param name="advanced_params_selector" value="ap_specify"/>
281 <param name="fk" value="True"/>
282 <param name="mkn" value="0.9"/>
283 <param name="mtc" value="200"/>
284 <param name="crosslink_bed_stats" value="True"/>
285 <output name="crosslink_bed_outfile" file="chrM:4000-8300.crosslink_sites.test4.bed"/>
286 <output name="binding_region_bed_outfile" file="chrM:4000-8300.binding_regions.test4.bed"/>
287 <output name="crosslink_bed_stats" file="chrM:4000-8300.crosslink_sites.test4.bed.stats"/>
288 </test>
289 </tests>
290 <help><![CDATA[
291
292 PureCLIP is a tool to detect protein-RNA interaction footprints from single-nucleotide CLIP-seq data, such as iCLIP and eCLIP. It accepts mapped eCLIP/iCLIP reads in BAM format as input and also supports control library and crosslink-associated (CL) motifs input for bias correction.
293
294 PureCLIP outputs two BED files, containing the found crosslink sites (first file) and binding regions (second file) that merge nearby crosslink sites to contiguous regions (region width controlled by -dm parameter).
295
296 By default, the tool parameters are set to values optimized for proteins binding to short defined binding regions, e.g. proteins binding to short specific motifs such as PUM2 and RBFOX2. This behaviour can be changed with the -bc option. The default setting -bc 0 is equivalent to manually setting -bdwn 50 -ntp 10 -ntp2 0 -b1p 0.01 -b2p 0.15. The second setting -bc 1 is designed for RBPs that produce larger clusters (proteins causing larger crosslink clusters with relatively lower read start counts, e.g. proteins binding to low complexity motifs). -bc 1 corresponds to the manual setting -bdwn 100 -antp -b2p 0.01 -b2p 0.1.
297
298 In case of different binding characteristics, you can manually adjust parameters -bdw, -bdwn, -b1p, -b2p, -antp or have a look at the online documentation for more details:
299
300 http://pureclip.readthedocs.io/en/latest/index.html
301
302 ]]></help>
303 <citations>
304 <citation type="doi">10.1186/s13059-017-1364-2</citation>
305 </citations>
306 </tool>