Mercurial > repos > iuc > pureclip
comparison pureclip.xml @ 0:eb000bccef28 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pureclip commit e2cf796f991cbe8c96e0cc5a0056b7255ac3ad6b
author | iuc |
---|---|
date | Thu, 17 May 2018 14:11:39 -0400 |
parents | |
children | fd1f57782683 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:eb000bccef28 |
---|---|
1 <tool id="pureclip" name="PureCLIP" version="1.0.4"> | |
2 <description>- HMM based peak caller designed for eCLIP/iCLIP data</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.0.4">pureclip</requirement> | |
5 </requirements> | |
6 <command detect_errors="exit_code"><![CDATA[ | |
7 ln -s '${target_bam_file}' target.bam && | |
8 ln -f -s '${target_bam_file.metadata.bam_index}' target.bam.bai && | |
9 ln -s '${genome_fasta_file}' genome.fa && | |
10 #if $control_bam_file: | |
11 ln -s '${control_bam_file}' control.bam && | |
12 ln -f -s '${control_bam_file.metadata.bam_index}' control.bam.bai && | |
13 #end if | |
14 #if $motif_data.motif_data_selector == 'supply_CL_motifs': | |
15 ln -s '${motif_data.cl_motif_bed_file}' motif_hits.bed && | |
16 #end if | |
17 | |
18 pureclip | |
19 -o crosslink_sites.bed | |
20 -or binding_regions.bed | |
21 -i target.bam | |
22 -bai target.bam.bai | |
23 -g genome.fa | |
24 #if $learn_params_contigs | |
25 -iv '$learn_params_contigs' | |
26 #end if | |
27 #if $apply_hmm_contigs | |
28 -iv '$apply_hmm_contigs' | |
29 #end if | |
30 -dm $merge_dist | |
31 #if $control_bam_file: | |
32 -ibam control.bam | |
33 -ibai control.bam.bai | |
34 #end if | |
35 #if $motif_data.motif_data_selector == 'supply_CL_motifs': | |
36 -fis motif_hits.bed | |
37 -nim $motif_data.max_motif_id | |
38 #end if | |
39 #if $bc_data.bc_data_selector == 'bc_0': | |
40 -bc 0 | |
41 #elif $bc_data.bc_data_selector == 'bc_1': | |
42 -bc 1 | |
43 #elif $bc_data.bc_data_selector == 'manual_setting': | |
44 -bw $bc_data.bandwidth | |
45 -bwn $bc_data.bandwidthn | |
46 -b1p $bc_data.b1p | |
47 -b2p $bc_data.b2p | |
48 #if $bc_data.antp_option.antp_option_selector == 'antp_select': | |
49 -antp | |
50 #elif $bc_data.antp_option.antp_option_selector == 'manual_select': | |
51 -ntp $bc_data.antp_option.ntp | |
52 -ntp2 $bc_data.antp_option.ntp2 | |
53 #end if | |
54 #end if | |
55 #if $advanced_params.advanced_params_selector == 'ap_specify': | |
56 $advanced_params.ld_precision | |
57 $advanced_params.use_viterbi | |
58 #if $advanced_params.max_iter_brent | |
59 -m $advanced_params.max_iter_brent | |
60 #end if | |
61 #if $advanced_params.max_iter_bw | |
62 -w $advanced_params.max_iter_bw | |
63 #end if | |
64 #if $advanced_params.g1kmin | |
65 -g1kmin $advanced_params.g1kmin | |
66 #end if | |
67 #if $advanced_params.g1kmax | |
68 -g1kmax $advanced_params.g1kmax | |
69 #end if | |
70 #if $advanced_params.g2kmin | |
71 -g2kmin $advanced_params.g2kmin | |
72 #end if | |
73 #if $advanced_params.g2kmax | |
74 -g2kmax $advanced_params.g2kmax | |
75 #end if | |
76 $advanced_params.fk | |
77 -mkn $advanced_params.mkn | |
78 -mtp $advanced_params.mtp | |
79 #if $advanced_params.mk | |
80 -mk $advanced_params.mk | |
81 #end if | |
82 #if $advanced_params.pa | |
83 -pa $advanced_params.pa | |
84 #end if | |
85 $advanced_params.ea1 | |
86 $advanced_params.ea2 | |
87 $advanced_params.et1 | |
88 $advanced_params.et2 | |
89 #if $advanced_params.mrtf | |
90 -mrtf $advanced_params.mrtf | |
91 #end if | |
92 -mtc $advanced_params.mtc | |
93 -pet $advanced_params.pet | |
94 #end if | |
95 ]]></command> | |
96 <inputs> | |
97 <param name="target_bam_file" type="data" format="bam" label="Target BAM file" argument="-i"/> | |
98 <param name="genome_fasta_file" type="data" format="fasta" label="Genome reference file" argument="-g"/> | |
99 <!-- Options --> | |
100 <param name="learn_params_contigs" type="text" optional="True" | |
101 label="Genomic chromosomes to learn HMM parameters" argument="-iv" | |
102 help="Genomic chromosomes to learn HMM parameters, e.g. 'chr1;chr2;chr3'. Contigs have to be in the same order as in BAM file. Useful to reduce runtime and memory consumption. Default: all contigs from reference file are used (useful when applying to transcript-wise alignments or poor data)."> | |
103 <sanitizer> | |
104 <valid initial="string.printable"> | |
105 <remove value="'"/> | |
106 </valid> | |
107 </sanitizer> | |
108 </param> | |
109 <param name="apply_hmm_contigs" type="text" label="Contigs to apply HMM" argument="-chr" optional="True" | |
110 help="Contigs to apply HMM, e.g. 'chr1;chr2;chr3;'. Contigs have to be in the same order as in BAM file."> | |
111 <sanitizer> | |
112 <valid initial="string.printable"> | |
113 <remove value="'"/> | |
114 </valid> | |
115 </sanitizer> | |
116 </param> | |
117 <param name="merge_dist" type="integer" value="8" min="1" | |
118 label="Distance used to merge individual crosslink sites to binding regions" argument="-dm"/> | |
119 <param name="control_bam_file" type="data" format="bam" optional="True" | |
120 label="BAM file containing mapped reads from control experiment" argument="-ibam" | |
121 help="Mapped reads in BAM format from a control experiment, e.g. eCLIP input"/> | |
122 <conditional name="motif_data"> | |
123 <param name="motif_data_selector" type="select" label="Crosslink-associated (CL) motif options"> | |
124 <option value="no_CL_motifs_available" selected="true">No CL motifs available</option> | |
125 <option value="supply_CL_motifs">Supply CL motifs</option> | |
126 </param> | |
127 <when value="no_CL_motifs_available" /> | |
128 <when value="supply_CL_motifs"> | |
129 <param name="cl_motif_bed_file" type="data" format="bed" | |
130 label="FIMO input motif score covariates file" argument="-fis" | |
131 help="FIMO input motif score covariates file"/> | |
132 <param name="max_motif_id" type="integer" value="1" | |
133 label="Max. motif ID to use" argument="-nim" | |
134 help="Max. motif ID to use (Default: only covariates with motif ID 1 are used)"/> | |
135 </when> | |
136 </conditional> | |
137 | |
138 <conditional name="bc_data"> | |
139 <param name="bc_data_selector" type="select" label="Define protein binding characteristics"> | |
140 <option value="bc_0" selected="true">RBP with short defined binding regions (-bc 0)</option> | |
141 <option value="bc_1">RBP with larger crosslink clusters and lower read start counts (-bc 1)</option> | |
142 <option value="manual_setting">Manual setting</option> | |
143 </param> | |
144 <when value="bc_0" /> | |
145 <when value="bc_1" /> | |
146 <when value="manual_setting"> | |
147 <param name="bandwidth" type="integer" value="50" min="1" max="500" | |
148 label="Bandwidth for kernel density estimation used to access enrichment" argument="-bw" | |
149 help="NOTE: Increasing the bandwidth increases runtime and memory consumption"/> | |
150 <param name="bandwidthn" type="integer" value="50" min="1" max="500" | |
151 label="Bandwidth for kernel density estimation used to estimate n for binomial distributions" argument="-bwn" | |
152 help="For proteins that rather slide along the RNA or show long crosslink clusters increase -bwn, e.g. to 100 (should be LE 4*bw)"/> | |
153 <param argument="-b1p" type="float" value="0.01" | |
154 label="Initial value for binomial probability parameter of 'non-crosslink' state" /> | |
155 <param argument="-b2p" type="float" value="0.15" | |
156 label="Initial value for binomial probability parameter of 'crosslink' state" /> | |
157 <conditional name="antp_option"> | |
158 <param name="antp_option_selector" type="select" label="Choose n threshold for estimating crosslink state parameters" help="Either automatically choose n threshold (-ntp, -ntp2) to estimate parameters linked to crosslink states based on expected read start count at crosslink sites, or manually set values"> | |
159 <option value="antp_select" selected="true">Automatically choose n threshold (-ntp, -ntp2)</option> | |
160 <option value="manual_select">Manually set -ntp, -ntp2</option> | |
161 </param> | |
162 <when value="antp_select" /> | |
163 <when value="manual_select"> | |
164 <param argument="-ntp" type="integer" value="10" | |
165 label="Only sites with n >= ntp are used to learn binomial probability parameters"/> | |
166 <param argument="-ntp2" type="integer" value="0" | |
167 label="Only sites with n >= ntp2 are used to learn probability of transition from state '2' to '2' or '3'" | |
168 help="Useful for data with low truncation rates at crosslink sites or in general high fraction of non-coinciding read starts"/> | |
169 </when> | |
170 </conditional> | |
171 </when> | |
172 </conditional> | |
173 <conditional name="advanced_params"> | |
174 <param name="advanced_params_selector" type="select" label="Additional advanced parameters"> | |
175 <option value="ap_not_specify" selected="true">Do not specify</option> | |
176 <option value="ap_specify">Manually specify</option> | |
177 </param> | |
178 <when value="ap_not_specify" /> | |
179 <when value="ap_specify"> | |
180 <param name="ld_precision" label="Use higher precision to compute emission probabilities (long double)" type="boolean" | |
181 truevalue="-ld" falsevalue="" checked="False" | |
182 help="Useful in cases of extreme outliers, e.g. extreme high read start counts whose emission probabilities are close to zero and which would be discarded in default setting (along with warning messages). Note: increases memory consumption. Use in combination with '-iv' (default: double)"/> | |
183 <param name="use_viterbi" label="Use Viterbi instead of posterior decoding" | |
184 type="boolean" truevalue="-vtb" falsevalue="" checked="False"/> | |
185 <param name="max_iter_brent" type="integer" optional="True" min="1" max="1000" | |
186 label="Maximum number of iterations within BRENT algorithm" argument="-m"/> | |
187 <param name="max_iter_bw" type="integer" optional="True" min="0" max="500" | |
188 label="Maximum number of iterations within Baum-Welch algorithm" argument="-w"/> | |
189 <param argument="-g1kmin" type="float" optional="True" | |
190 label="Minimum shape k of 'non-enriched' gamma distribution" /> | |
191 <param argument="-g1kmax" type="float" optional="True" | |
192 label="Maximum shape k of 'non-enriched' gamma distribution" /> | |
193 <param argument="-g2kmin" type="float" optional="True" | |
194 label="Minimum shape k of 'enriched' gamma distribution" /> | |
195 <param argument="-g2kmax" type="float" optional="True" | |
196 label="Maximum shape k of 'enriched' gamma distribution" /> | |
197 <param argument="-fk" label="Do not constrain 'non-enriched' shape parameter k" | |
198 type="boolean" truevalue="-fk" falsevalue="" checked="False" | |
199 help="When incorporating input signal, do not constrain 'non-enriched' shape parameter k LE 'enriched' gamma parameter k"/> | |
200 <param argument="-mkn" type="float" value="1.0" min="0.5" max="1.5" | |
201 label="Max. k/N ratio (read start sites/N) used to learn truncation probabilities for 'non-crosslink' and 'crosslink' emission probabilities" | |
202 help="NOTE: high ratios might originate from mapping artifacts that can disturb parameter learning"/> | |
203 <param argument="-mtp" type="float" value="0.0001" | |
204 label="Min. transition probability from state '2' to '3'" | |
205 help="Helpful for poor data, where no clear distinction between 'enriched' and 'non-enriched' is possible"/> | |
206 <param argument="-mk" type="float" optional="True" | |
207 label="Minimum KDE value used for fitting left-truncated gamma distributions" | |
208 help="Default: corresponding to singleton read start."/> | |
209 <param argument="-pa" type="integer" optional="True" | |
210 label="Length threshold for internal poly-X stretches to get excluded" /> | |
211 <param argument="-ea1" label="Exclude intervals containing poly-A stretches from learning" | |
212 type="boolean" truevalue="-ea1" falsevalue="" checked="False"/> | |
213 <param argument="-ea2" label="Exclude intervals containing poly-A stretches from analysis" | |
214 type="boolean" truevalue="-ea2" falsevalue="" checked="False"/> | |
215 <param argument="-et1" label="Exclude intervals containing poly-U stretches from learning" | |
216 type="boolean" truevalue="-et1" falsevalue="" checked="False"/> | |
217 <param argument="-et2" label="Exclude intervals containing poly-U stretches from analysis" | |
218 type="boolean" truevalue="-et2" falsevalue="" checked="False"/> | |
219 <param argument="-mrtf" type="float" optional="True" | |
220 label="Fit gamma shape k only for positions with min. covariate value" /> | |
221 <param argument="-mtc" type="integer" value="250" min="50" max="500" | |
222 label="Maximum number of truncations at one position used for learning" | |
223 help="NOTE: for sites with counts above threshold the whole covered regions will be ignored for learning!"/> | |
224 <param argument="-pet" type="integer" value="7" min="2" max="50" | |
225 label="Prior enrichment threshold" | |
226 help="A KDE threshold corresponding to -pet read start counts at one position will be used for initial classification of 'non-enriched' and 'enriched' site"/> | |
227 </when> | |
228 </conditional> | |
229 <section name="output_options" title="Additional output options"> | |
230 <param name="crosslink_bed_stats" type="boolean" value="False" label="Output learned parameter statistics file?"/> | |
231 </section> | |
232 </inputs> | |
233 <outputs> | |
234 <data format="bed" name="crosslink_bed_outfile" label="${tool.name} on ${on_string} crosslink sites (bed)" from_work_dir="crosslink_sites.bed"/> | |
235 <data format="bed" name="binding_region_bed_outfile" label="${tool.name} on ${on_string} binding regions (bed)" from_work_dir="binding_regions.bed"/> | |
236 <data format="txt" name="crosslink_bed_stats" label="${tool.name} on ${on_string} learned parameter statistcs (txt)" from_work_dir="crosslink_sites.bed.stats"> | |
237 <filter>(output_options['crosslink_bed_stats'] is True)</filter> | |
238 </data> | |
239 </outputs> | |
240 <tests> | |
241 <test> | |
242 <param name="target_bam_file" value="aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/> | |
243 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/> | |
244 <param name="crosslink_bed_stats" value="True"/> | |
245 <output name="crosslink_bed_outfile" file="chrM:4000-8300.crosslink_sites.bed"/> | |
246 <output name="binding_region_bed_outfile" file="chrM:4000-8300.binding_regions.bed"/> | |
247 <output name="crosslink_bed_stats" file="chrM:4000-8300.crosslink_sites.bed.stats"/> | |
248 </test> | |
249 <test> | |
250 <param name="target_bam_file" value="aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/> | |
251 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/> | |
252 <param name="control_bam_file" value="input.aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/> | |
253 <param name="crosslink_bed_stats" value="True"/> | |
254 <output name="crosslink_bed_outfile" file="chrM:4000-8300.crosslink_sites.cov_input_signal.bed"/> | |
255 <output name="binding_region_bed_outfile" file="chrM:4000-8300.binding_regions.cov_input_signal.bed"/> | |
256 <output name="crosslink_bed_stats" file="chrM:4000-8300.crosslink_sites.cov_input_signal.bed.stats"/> | |
257 </test> | |
258 <test> | |
259 <param name="target_bam_file" value="aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/> | |
260 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/> | |
261 <param name="motif_data_selector" value="supply_CL_motifs"/> | |
262 <param name="cl_motif_bed_file" value="fimo_clmotif_occurences.chrM:4000-8300.bed" ftype="bed"/> | |
263 <param name="max_motif_id" value="4"/> | |
264 <param name="crosslink_bed_stats" value="True"/> | |
265 <output name="crosslink_bed_outfile" file="chrM:4000-8300.crosslink_sites.cov_CLmotifs.bed"/> | |
266 <output name="binding_region_bed_outfile" file="chrM:4000-8300.binding_regions.cov_CLmotifs.bed"/> | |
267 <output name="crosslink_bed_stats" file="chrM:4000-8300.crosslink_sites.cov_CLmotifs.bed.stats"/> | |
268 </test> | |
269 <test> | |
270 <param name="target_bam_file" value="aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/> | |
271 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/> | |
272 <param name="control_bam_file" value="input.aligned.prepro.R2.chrM:4000-8300.bam" ftype="bam"/> | |
273 <param name="bc_data_selector" value="manual_setting"/> | |
274 <param name="bandwidthn" value="50"/> | |
275 <param name="b1p" value="0.01"/> | |
276 <param name="b2p" value="0.15"/> | |
277 <param name="antp_option_selector" value="manual_select"/> | |
278 <param name="ntp" value="10"/> | |
279 <param name="ntp2" value="0"/> | |
280 <param name="advanced_params_selector" value="ap_specify"/> | |
281 <param name="fk" value="True"/> | |
282 <param name="mkn" value="0.9"/> | |
283 <param name="mtc" value="200"/> | |
284 <param name="crosslink_bed_stats" value="True"/> | |
285 <output name="crosslink_bed_outfile" file="chrM:4000-8300.crosslink_sites.test4.bed"/> | |
286 <output name="binding_region_bed_outfile" file="chrM:4000-8300.binding_regions.test4.bed"/> | |
287 <output name="crosslink_bed_stats" file="chrM:4000-8300.crosslink_sites.test4.bed.stats"/> | |
288 </test> | |
289 </tests> | |
290 <help><![CDATA[ | |
291 | |
292 PureCLIP is a tool to detect protein-RNA interaction footprints from single-nucleotide CLIP-seq data, such as iCLIP and eCLIP. It accepts mapped eCLIP/iCLIP reads in BAM format as input and also supports control library and crosslink-associated (CL) motifs input for bias correction. | |
293 | |
294 PureCLIP outputs two BED files, containing the found crosslink sites (first file) and binding regions (second file) that merge nearby crosslink sites to contiguous regions (region width controlled by -dm parameter). | |
295 | |
296 By default, the tool parameters are set to values optimized for proteins binding to short defined binding regions, e.g. proteins binding to short specific motifs such as PUM2 and RBFOX2. This behaviour can be changed with the -bc option. The default setting -bc 0 is equivalent to manually setting -bdwn 50 -ntp 10 -ntp2 0 -b1p 0.01 -b2p 0.15. The second setting -bc 1 is designed for RBPs that produce larger clusters (proteins causing larger crosslink clusters with relatively lower read start counts, e.g. proteins binding to low complexity motifs). -bc 1 corresponds to the manual setting -bdwn 100 -antp -b2p 0.01 -b2p 0.1. | |
297 | |
298 In case of different binding characteristics, you can manually adjust parameters -bdw, -bdwn, -b1p, -b2p, -antp or have a look at the online documentation for more details: | |
299 | |
300 http://pureclip.readthedocs.io/en/latest/index.html | |
301 | |
302 ]]></help> | |
303 <citations> | |
304 <citation type="doi">10.1186/s13059-017-1364-2</citation> | |
305 </citations> | |
306 </tool> |