0
|
1 <?xml version="1.0" encoding="utf-8"?>
|
|
2 <tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@-0">
|
|
3 <description>Integrated analysis of gene expression data and ChIP data</description>
|
|
4 <macros>
|
|
5 <import>rnachipintegrator_macros.xml</import>
|
|
6 </macros>
|
|
7 <expand macro="requirements" />
|
|
8 <expand macro="version_command" />
|
|
9 <command interpreter="bash">rnachipintegrator_wrapper.sh
|
|
10 #if str( $analysis_options.peak_type ) == "summits"
|
|
11 #if str( $analysis_options.window ) != ""
|
|
12 --window=$analysis_options.window
|
|
13 #end if
|
|
14 #if str( $analysis_options.cutoff ) != ""
|
|
15 --cutoff=$analysis_options.cutoff
|
|
16 #end if
|
|
17 #end if
|
|
18 #if str( $analysis_options.peak_type ) == "regions"
|
|
19 #if str( $analysis_options.edge_cutoff ) != ""
|
|
20 --edge-cutoff=$analysis_options.edge_cutoff
|
|
21 #end if
|
|
22 #if str( $analysis_options.number ) != ""
|
|
23 --number=$analysis_options.number
|
|
24 #end if
|
|
25 #if (str( $analysis_options.promoter_start ) != "" and str( $analysis_options.promoter_end ))
|
|
26 --promoter_region=$analysis_options.promoter_start,$analysis_options.promoter_end
|
|
27 #end if
|
|
28 #if $analysis_options.pad_output
|
|
29 --pad
|
|
30 #end if
|
|
31 #end if
|
|
32 $rnaseq $chipseq
|
|
33 --output_xls $xls_output
|
|
34 #if $results_as_zip
|
|
35 --zip_file $zip_file
|
|
36 #else
|
|
37 #if str( $analysis_options.peak_type ) == "summits"
|
|
38 --summit_outputs $peaks_to_transcripts_out $tss_to_summits_out
|
|
39 #end if
|
|
40 #if str( $analysis_options.peak_type ) == "regions"
|
|
41 --peak_outputs $transcripts_to_edges_out
|
|
42 $transcripts_to_edges_summary
|
|
43 $tss_to_edges_out
|
|
44 $tss_to_edges_summary
|
|
45 #end if
|
|
46 #end if
|
|
47 </command>
|
|
48 <inputs>
|
|
49 <param format="tabular" name="rnaseq" type="data" label="Gene expression data file" />
|
|
50 <param format="tabular" name="chipseq" type="data" label="ChIP peaks data file" />
|
|
51 <conditional name="analysis_options">
|
|
52 <!-- user must specify if ChIP peaks are summits or regions -->
|
|
53 <param name="peak_type" type="select" label="ChIP peaks are"
|
|
54 help="Options and outputs depend on whether ChIP data are summits or regions">
|
|
55 <option value="summits">summits</option>
|
|
56 <option value="regions">regions</option>
|
|
57 </param>
|
|
58 <when value="summits">
|
|
59 <param name="window" type="integer" value="20000" optional="true"
|
|
60 label="Maximum distance a peak can be from each transcript
|
|
61 TSS before being omitted from analysis" />
|
|
62 <param name="cutoff" type="integer" value="130000" optional="true"
|
|
63 label="Maximum distance a transcript TSS can be from each
|
|
64 peak before being omitted from the analysis" />
|
|
65 </when>
|
|
66 <when value="regions">
|
|
67 <param name="edge_cutoff" type="integer" value="10000" optional="true"
|
|
68 label="Maximum distance a transcript edge can be from the
|
|
69 peak edge before being omitted from the analysis"
|
|
70 help="Set to zero to indicate that no cut off should be applied" />
|
|
71 <param name="number" type="integer" value="4" optional="true"
|
|
72 label="Maximum number of transcripts per peak to report from
|
|
73 from the analysis" />
|
|
74 <param name="promoter_start" type="integer" value="-10000" optional="true"
|
|
75 label="Start of promoter region with respect to gene TSS" />
|
|
76 <param name="promoter_end" type="integer" value="2500" optional="true"
|
|
77 label="End of promoter region with respect to gene TSS" />
|
|
78 <param name="pad_output" type="boolean" checked="false" truevalue="yes"
|
|
79 label="Output same number of lines for each peak"
|
|
80 help="Add blank lines in output for peaks with fewer than maximum number
|
|
81 of hits (--pad)" />
|
|
82 </when>
|
|
83 </conditional>
|
|
84 <param name="results_as_zip" type="boolean" checked="false" truevalue="yes"
|
|
85 label="Put output tab-delimited files into a single zip archive" />
|
|
86 </inputs>
|
|
87 <outputs>
|
|
88 <!-- Always produce XLS output -->
|
|
89 <data format="xls" name="xls_output"
|
|
90 label="All RnaChipIntegrator analyses for ${rnaseq.name} vs ${chipseq.name} (Excel spreadsheet)" />
|
|
91 <!-- Outputs only produced for summit data -->
|
|
92 <data format="tabular" name="peaks_to_transcripts_out"
|
|
93 label="Nearest summits to transcripts for ${rnaseq.name} vs ${chipseq.name}" >
|
|
94 <filter>analysis_options['peak_type'] == "summits"</filter>
|
|
95 <filter>results_as_zip is False</filter>
|
|
96 </data>
|
|
97 <data format="tabular" name="tss_to_summits_out"
|
|
98 label="Nearest TSS to summits for ${rnaseq.name} vs ${chipseq.name}" >
|
|
99 <filter>analysis_options['peak_type'] == "summits"</filter>
|
|
100 <filter>results_as_zip is False</filter>
|
|
101 </data>
|
|
102 <!-- Outputs only produced for peak data -->
|
|
103 <data format="tabular" name="transcripts_to_edges_out"
|
|
104 label="Nearest transcripts to peak edges for ${rnaseq.name} vs ${chipseq.name}" >
|
|
105 <filter>analysis_options['peak_type'] == "regions"</filter>
|
|
106 <filter>results_as_zip is False</filter>
|
|
107 </data>
|
|
108 <data format="tabular" name="transcripts_to_edges_summary"
|
|
109 label="Nearest transcripts to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" >
|
|
110 <filter>analysis_options['peak_type'] == "regions"</filter>
|
|
111 <filter>results_as_zip is False</filter>
|
|
112 </data>
|
|
113 <data format="tabular" name="tss_to_edges_out"
|
|
114 label="Nearest TSS to peak edges for ${rnaseq.name} vs ${chipseq.name}" >
|
|
115 <filter>analysis_options['peak_type'] == "regions"</filter>
|
|
116 <filter>results_as_zip is False</filter>
|
|
117 </data>
|
|
118 <data format="tabular" name="tss_to_edges_summary"
|
|
119 label="Nearest TSS to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" >
|
|
120 <filter>analysis_options['peak_type'] == "regions"</filter>
|
|
121 <filter>results_as_zip is False</filter>
|
|
122 </data>
|
|
123 <data format="zip" name="zip_file"
|
|
124 label="All tab-delimited files for ${rnaseq.name} vs ${chipseq.name} (zip file)" >
|
|
125 <filter>results_as_zip is True</filter>
|
|
126 </data>
|
|
127 </outputs>
|
|
128 <tests>
|
|
129 <test>
|
|
130 <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" />
|
|
131 <param name="chipseq" value="ChIP_summits.txt" ftype="tabular" />
|
|
132 <param name="peak_type" value="summits" />
|
|
133 <param name="window" value="20000" />
|
|
134 <param name="cutoff" value="130000" />
|
|
135 <!--
|
|
136 **NB** outputs have to be specified in order that they appear in the
|
|
137 tool (which is the order they will be written to the history) - the
|
|
138 test framework seems to use the order and ignores the "name" attribute
|
|
139 -->
|
|
140 <output name="xls_output" file="summits.xls" compare="sim_size" />
|
|
141 <output name="peaks_to_transcripts_out" file="peaks_to_transcripts.out" ftype="tabular" />
|
|
142 <output name="tss_to_summits_out" file="tss_to_summits.out" ftype="tabular" />
|
|
143 </test>
|
|
144 <test>
|
|
145 <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" />
|
|
146 <param name="chipseq" value="ChIP_peaks.txt" ftype="tabular" />
|
|
147 <param name="peak_type" value="regions" />
|
|
148 <param name="edge_cutoff" value="130000" />
|
|
149 <!--
|
|
150 **NB** outputs have to be specified in order that they appear in the
|
|
151 tool (which is the order they will be written to the history) - the
|
|
152 test framework seems to use the order and ignores the "name" attribute
|
|
153 -->
|
|
154 <output name="xls_output" file="peaks.xls" compare="sim_size" />
|
|
155 <output name="transcripts_to_edges_out" file="transcripts_to_edges.out" ftype="tabular" />
|
|
156 <output name="transcripts_to_edges_summary" file="transcripts_to_edges.summary" ftype="tabular" />
|
|
157 <output name="tss_to_edges_out" file="tss_to_edges.out" ftype="tabular" />
|
|
158 <output name="tss_to_edges_summary" file="tss_to_edges.summary" ftype="tabular" />
|
|
159 </test>
|
|
160 </tests>
|
|
161 <help>
|
|
162
|
|
163 .. class:: infomark
|
|
164
|
|
165 **What it does**
|
|
166
|
|
167 Run RnaChipIntegrator to perform integrated analyses of gene expression
|
|
168 and ChIP data, identifying the nearest ChIP peaks to each transcript
|
|
169 and vice versa.
|
|
170
|
|
171 For ChIP peaks defined as regions the following analyses are performed:
|
|
172
|
|
173 * **TranscriptsToPeakEdges**: reports the nearest transcripts with the smallest
|
|
174 distance from either their TSS or TES to the nearest peak edge.
|
|
175
|
|
176 * **TSSToPeakEdges**: reports the nearest transcripts with the smallest distance
|
|
177 from their TSS to the nearest peak edge.
|
|
178
|
|
179 For ChIP peaks defined as summits:
|
|
180
|
|
181 * **TSSToSummits**: reports the nearest transcripts with the smallest distance
|
|
182 from the TSS to the nearest peak summit.
|
|
183
|
|
184 * **PeaksToTranscripts**: reports the nearest peak summits with the smallest
|
|
185 distance to either the TSS or TES of each transcript.
|
|
186
|
|
187 The program was originally written specifically for ChIP-Seq and RNA-Seq data
|
|
188 but works equally well for ChIP-chip and microarray expression data, and can
|
|
189 also be used to integrate any set of genomic features (e.g. canonical genes,
|
|
190 CpG islands) with expression data.
|
|
191
|
|
192 RnaChipIntgerator can be obtained from
|
|
193 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
|
|
194
|
|
195 -------------
|
|
196
|
|
197 .. class:: infomark
|
|
198
|
|
199 **Input**
|
|
200
|
|
201 The expression data must be in a tab-delimited file with the following columns
|
|
202 of data for each genomic feature (one feature per line):
|
|
203
|
|
204 ====== ========== ======================================================================
|
|
205 Column Name Description
|
|
206 ====== ========== ======================================================================
|
|
207 1 ID Name used to identify the feature in the output
|
|
208 2 chr Chromosome name
|
|
209 3 start Start position of the feature
|
|
210 4 end End position of the feature
|
|
211 5 strand Must be either '+' or '-'
|
|
212 6 diff_expr Optional: indicates feature is differentially expressed (1) or not (0)
|
|
213 ====== ========== ======================================================================
|
|
214
|
|
215 The ChIP-seq data must be in a tab-delimited file with 3 columns of data for each
|
|
216 ChIP peak (one per line):
|
|
217
|
|
218 ====== ========== ======================================================================
|
|
219 Column Name Description
|
|
220 ====== ========== ======================================================================
|
|
221 1 chr Chromosome name (must match one of those in expression data file)
|
|
222 2 start Start position of the peak
|
|
223 3 end End position of the peak (start + 1 for summit data)
|
|
224 ====== ========== ======================================================================
|
|
225
|
|
226 The ChIP peak data can be either the summit (in which case 'end' - 'start' = 1) or the
|
|
227 entire extent of the binding region (with 'start' and 'end' indicating the limits).
|
|
228
|
|
229 -------------
|
|
230
|
|
231 .. class:: infomark
|
|
232
|
|
233 **Output**
|
|
234
|
|
235 The outputs from this tool vary depending on the type of data that is input, however
|
|
236 generally there is one tab-delimited results file for each analysis described above
|
|
237 in the **What it does** section (some analyses output a second file with just the
|
|
238 "best" hits).
|
|
239
|
|
240 A history item will be generated for each output file, unless the option to put them
|
|
241 into a single zip archive is selected; this archive file will have to be downloaded
|
|
242 and unzipped on your local machine. It is recommended that you refer to the
|
|
243 RnaChipIntegrator documentation for information on the contents of each output file:
|
|
244 https://github.com/fls-bioinformatics-core/RnaChipIntegrator/blob/master/doc/MANUAL.markdown
|
|
245
|
|
246 In addition an Excel spreadsheet (with one page for each analysis performed) is always
|
|
247 produced.
|
|
248
|
|
249 -------------
|
|
250
|
|
251 .. class:: infomark
|
|
252
|
|
253 **Credits**
|
|
254
|
|
255 This Galaxy tool has been developed within the Bioinformatics Core Facility at the
|
|
256 University of Manchester. It runs the RnaChipIntegrator package which has also been
|
|
257 developed by this group, and is documented at
|
|
258 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
|
|
259
|
|
260 Please kindly acknowledge the Bioinformatics Core Facility if you use this tool.
|
|
261 </help>
|
|
262 </tool>
|