comparison rnachipintegrator_canonical_genes.xml @ 0:d9c1f2133124 draft

Uploaded initial version 0.4.4.
author pjbriggs
date Tue, 30 Jun 2015 06:44:06 -0400
parents
children 5f69a2c1b9c9
comparison
equal deleted inserted replaced
-1:000000000000 0:d9c1f2133124
1 <tool id="rnachipintegrator_canonical_genes" name="Analyse canonical genes against ChIP data" version="@VERSION@-0">
2 <description>using RnaChipIntegrator</description>
3 <macros>
4 <import>rnachipintegrator_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_command" />
8 <command interpreter="bash">rnachipintegrator_wrapper.sh
9 #if str( $analysis_options.peak_type ) == "summits"
10 #if str( $analysis_options.window ) != ""
11 --window=$analysis_options.window
12 #end if
13 #if str( $analysis_options.cutoff ) != ""
14 --cutoff=$analysis_options.cutoff
15 #end if
16 #end if
17 #if str( $analysis_options.peak_type ) == "regions"
18 #if str( $analysis_options.edge_cutoff ) != ""
19 --edge-cutoff=$analysis_options.edge_cutoff
20 #end if
21 #if str( $analysis_options.number ) != ""
22 --number=$analysis_options.number
23 #end if
24 #if (str( $analysis_options.promoter_start ) != "" and str( $analysis_options.promoter_end ))
25 --promoter_region=$analysis_options.promoter_start,$analysis_options.promoter_end
26 #end if
27 #if $analysis_options.pad_output
28 --pad
29 #end if
30 #end if
31 ${canonical_genes.fields.path} $chipseq
32 --output_xls $xls_output
33 #if $results_as_zip
34 --zip_file $zip_file
35 #else
36 #if str( $analysis_options.peak_type ) == "summits"
37 --summit_outputs $peaks_to_transcripts_out $tss_to_summits_out
38 #end if
39 #if str( $analysis_options.peak_type ) == "regions"
40 --peak_outputs $transcripts_to_edges_out
41 $transcripts_to_edges_summary
42 $tss_to_edges_out
43 $tss_to_edges_summary
44 #end if
45 #end if
46 </command>
47 <inputs>
48 <param format="tabular" name="chipseq" type="data" label="ChIP peaks data file" />
49 <param name="canonical_genes" type="select" label="Canonical genes to analyse ChIP peaks against">
50 <options from_data_table="rnachipintegrator_canonical_genes">
51 </options>
52 </param>
53 <conditional name="analysis_options">
54 <!-- user must specify if ChIP peaks are summits or regions -->
55 <param name="peak_type" type="select" label="ChIP peaks are"
56 help="Options and outputs depend on whether ChIP data are summits or regions">
57 <option value="summits">summits</option>
58 <option value="regions">regions</option>
59 </param>
60 <when value="summits">
61 <param name="window" type="integer" value="20000" optional="true"
62 label="Maximum distance a peak can be from each transcript
63 TSS before being omitted from analysis" />
64 <param name="cutoff" type="integer" value="130000" optional="true"
65 label="Maximum distance a transcript TSS can be from each
66 peak before being omitted from the analysis" />
67 </when>
68 <when value="regions">
69 <param name="edge_cutoff" type="integer" value="10000" optional="true"
70 label="Maximum distance a transcript edge can be from the
71 peak edge before being omitted from the analysis"
72 help="Set to zero to indicate that no cut off should be applied" />
73 <param name="number" type="integer" value="4" optional="true"
74 label="Maximum number of transcripts per peak to report from
75 from the analysis" />
76 <param name="promoter_start" type="integer" value="-10000" optional="true"
77 label="Start of promoter region with respect to gene TSS" />
78 <param name="promoter_end" type="integer" value="2500" optional="true"
79 label="End of promoter region with respect to gene TSS" />
80 <param name="pad_output" type="boolean" checked="false" truevalue="yes"
81 label="Output same number of lines for each peak (--pad)"
82 help="Add blank lines in output for peaks with fewer than maximum number
83 of hits" />
84 </when>
85 </conditional>
86 <param name="results_as_zip" type="boolean" checked="false" truevalue="yes"
87 label="Put output tab-delimited files into a single zip archive" />
88 </inputs>
89 <outputs>
90 <!-- Always produce XLS output -->
91 <data format="xls" name="xls_output"
92 label="All RnaChipIntegrator analyses for ${canonical_genes.fields.name} vs ${chipseq.name} (Excel spreadsheet)" />
93 <!-- Outputs only produced for summit data -->
94 <data format="tabular" name="peaks_to_transcripts_out"
95 label="Nearest summits to transcripts for ${canonical_genes.fields.name} vs ${chipseq.name}" >
96 <filter>analysis_options['peak_type'] == "summits"</filter>
97 <filter>results_as_zip is False</filter>
98 </data>
99 <data format="tabular" name="tss_to_summits_out"
100 label="Nearest summits to TSS for ${canonical_genes.fields.name} vs ${chipseq.name}" >
101 <filter>analysis_options['peak_type'] == "summits"</filter>
102 <filter>results_as_zip is False</filter>
103 </data>
104 <!-- Outputs only produced for peak data -->
105 <data format="tabular" name="transcripts_to_edges_out"
106 label="Nearest transcripts to peak edges for ${canonical_genes.fields.name} vs ${chipseq.name}" >
107 <filter>analysis_options['peak_type'] == "regions"</filter>
108 <filter>results_as_zip is False</filter>
109 </data>
110 <data format="tabular" name="transcripts_to_edges_summary"
111 label="Nearest transcripts to peak edges (summary) for ${canonical_genes.fields.name} vs ${chipseq.name}" >
112 <filter>analysis_options['peak_type'] == "regions"</filter>
113 <filter>results_as_zip is False</filter>
114 </data>
115 <data format="tabular" name="tss_to_edges_out"
116 label="Nearest TSS to peak edges for ${canonical_genes.fields.name} vs ${chipseq.name}" >
117 <filter>analysis_options['peak_type'] == "regions"</filter>
118 <filter>results_as_zip is False</filter>
119 </data>
120 <data format="tabular" name="tss_to_edges_summary"
121 label="Nearest TSS to peak edges (summary) for ${canonical_genes.fields.name} vs ${chipseq.name}" >
122 <filter>analysis_options['peak_type'] == "regions"</filter>
123 <filter>results_as_zip is False</filter>
124 </data>
125 <data format="zip" name="zip_file"
126 label="All tab-delimited files for ${canonical_genes.fields.name} vs ${chipseq.name} (zip file)" >
127 <filter>results_as_zip is True</filter>
128 </data>
129 </outputs>
130 <tests>
131 <test>
132 <param name="chipseq" value="mm9_summits.txt" />
133 <param name="canonical_genes" value="mm9_test" />
134 <param name="peak_type" value="summits" />
135 <param name="window" value="50000" />
136 <param name="cutoff" value="130000" />
137 <output name="xls_output" file="mm9_summits.xls" compare="sim_size" />
138 <output name="peaks_to_transcripts_out" file="mm9_summits_to_transcripts.out" ftype="tabular" />
139 <output name="tss_to_summits_out" file="mm9_tss_to_summits.out" ftype="tabular" />
140 </test>
141 <test>
142 <param name="chipseq" value="mm9_peaks.txt" />
143 <param name="canonical_genes" value="mm9_test" />
144 <param name="peak_type" value="regions" />
145 <param name="edge_cutoff" value="50000" />
146 <output name="xls_output" file="mm9_peaks.xls" compare="sim_size" />
147 <output name="transcripts_to_edges_out" file="mm9_transcripts_to_edges.out" ftype="tabular" />
148 <output name="transcripts_to_edges_summary" file="mm9_transcripts_to_edges.summary" ftype="tabular" />
149 <output name="tss_to_edges_out" file="mm9_tss_to_edges.out" ftype="tabular" />
150 <output name="tss_to_edges_summary" file="mm9_tss_to_edges.summary" ftype="tabular" />
151 </test>
152 </tests>
153 <help>
154
155 .. class:: infomark
156
157 **What it does**
158
159 Run RnaChipIntegrator to analyse ChIP data against a set of list of "canonical
160 genes" for a specific organism/genome build, identifying the nearest ChIP peaks
161 to each cannonical gene (vice versa).
162
163 For ChIP peaks defined as regions the following analyses are performed:
164
165 * **TranscriptsToPeakEdges**: reports the nearest transcripts with the smallest
166 distance from either their TSS or TES to the nearest peak edge.
167
168 * **TSSToPeakEdges**: reports the nearest transcripts with the smallest distance
169 from their TSS to the nearest peak edge.
170
171 For ChIP peaks defined as summits:
172
173 * **TSSToSummits**: reports the nearest transcripts with the smallest distance
174 from the TSS to the nearest peak summit.
175
176 * **PeaksToTranscripts**: reports the nearest peak summits with the smallest
177 distance to either the TSS or TES of each transcript.
178
179 RnaChipIntgerator can be obtained from
180 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
181
182 -------------
183
184 .. class:: infomark
185
186 **Input**
187
188 The ChIP-seq data must be in a tab-delimited file with 3 columns of data for each
189 ChIP peak (one per line):
190
191 ====== ========== ======================================================================
192 Column Name Description
193 ====== ========== ======================================================================
194 1 chr Chromosome name (must match one of those in expression data file)
195 2 start Start position of the peak
196 3 end End position of the peak (start + 1 for summit data)
197 ====== ========== ======================================================================
198
199 The ChIP peak data can be either the summit (in which case 'end' - 'start' = 1) or the
200 entire extent of the binding region (with 'start' and 'end' indicating the limits).
201
202 -------------
203
204 .. class:: infomark
205
206 **Output**
207
208 The outputs from this tool vary depending on the type of ChIP data that is input (i.e
209 summits or peaks), however generally there is one tab-delimited results file for each
210 analysis described above in the **What it does** section (some analyses output a second
211 file with just the "best" hits).
212
213 A history item will be generated for each output file, unless the option to put them
214 into a single zip archive is selected; this archive file will have to be downloaded
215 and unzipped on your local machine. It is recommended that you refer to the
216 RnaChipIntegrator documentation for information on the contents of each output file:
217 https://github.com/fls-bioinformatics-core/RnaChipIntegrator/blob/master/doc/MANUAL.markdown
218
219 In addition an Excel spreadsheet (with one page for each analysis performed) is always
220 produced.
221
222 -------------
223
224 .. class:: infomark
225
226 **Credits**
227
228 This Galaxy tool has been developed within the Bioinformatics Core Facility at the
229 University of Manchester. It runs the RnaChipIntegrator package which has also been
230 developed by this group, and is documented at
231 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
232
233 Please kindly acknowledge the Bioinformatics Core Facility if you use this tool.
234 </help>
235 </tool>