comparison rnachipintegrator_wrapper.xml @ 0:d9c1f2133124 draft

Uploaded initial version 0.4.4.
author pjbriggs
date Tue, 30 Jun 2015 06:44:06 -0400
parents
children 5f69a2c1b9c9
comparison
equal deleted inserted replaced
-1:000000000000 0:d9c1f2133124
1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@-0">
3 <description>Integrated analysis of gene expression data and ChIP data</description>
4 <macros>
5 <import>rnachipintegrator_macros.xml</import>
6 </macros>
7 <expand macro="requirements" />
8 <expand macro="version_command" />
9 <command interpreter="bash">rnachipintegrator_wrapper.sh
10 #if str( $analysis_options.peak_type ) == "summits"
11 #if str( $analysis_options.window ) != ""
12 --window=$analysis_options.window
13 #end if
14 #if str( $analysis_options.cutoff ) != ""
15 --cutoff=$analysis_options.cutoff
16 #end if
17 #end if
18 #if str( $analysis_options.peak_type ) == "regions"
19 #if str( $analysis_options.edge_cutoff ) != ""
20 --edge-cutoff=$analysis_options.edge_cutoff
21 #end if
22 #if str( $analysis_options.number ) != ""
23 --number=$analysis_options.number
24 #end if
25 #if (str( $analysis_options.promoter_start ) != "" and str( $analysis_options.promoter_end ))
26 --promoter_region=$analysis_options.promoter_start,$analysis_options.promoter_end
27 #end if
28 #if $analysis_options.pad_output
29 --pad
30 #end if
31 #end if
32 $rnaseq $chipseq
33 --output_xls $xls_output
34 #if $results_as_zip
35 --zip_file $zip_file
36 #else
37 #if str( $analysis_options.peak_type ) == "summits"
38 --summit_outputs $peaks_to_transcripts_out $tss_to_summits_out
39 #end if
40 #if str( $analysis_options.peak_type ) == "regions"
41 --peak_outputs $transcripts_to_edges_out
42 $transcripts_to_edges_summary
43 $tss_to_edges_out
44 $tss_to_edges_summary
45 #end if
46 #end if
47 </command>
48 <inputs>
49 <param format="tabular" name="rnaseq" type="data" label="Gene expression data file" />
50 <param format="tabular" name="chipseq" type="data" label="ChIP peaks data file" />
51 <conditional name="analysis_options">
52 <!-- user must specify if ChIP peaks are summits or regions -->
53 <param name="peak_type" type="select" label="ChIP peaks are"
54 help="Options and outputs depend on whether ChIP data are summits or regions">
55 <option value="summits">summits</option>
56 <option value="regions">regions</option>
57 </param>
58 <when value="summits">
59 <param name="window" type="integer" value="20000" optional="true"
60 label="Maximum distance a peak can be from each transcript
61 TSS before being omitted from analysis" />
62 <param name="cutoff" type="integer" value="130000" optional="true"
63 label="Maximum distance a transcript TSS can be from each
64 peak before being omitted from the analysis" />
65 </when>
66 <when value="regions">
67 <param name="edge_cutoff" type="integer" value="10000" optional="true"
68 label="Maximum distance a transcript edge can be from the
69 peak edge before being omitted from the analysis"
70 help="Set to zero to indicate that no cut off should be applied" />
71 <param name="number" type="integer" value="4" optional="true"
72 label="Maximum number of transcripts per peak to report from
73 from the analysis" />
74 <param name="promoter_start" type="integer" value="-10000" optional="true"
75 label="Start of promoter region with respect to gene TSS" />
76 <param name="promoter_end" type="integer" value="2500" optional="true"
77 label="End of promoter region with respect to gene TSS" />
78 <param name="pad_output" type="boolean" checked="false" truevalue="yes"
79 label="Output same number of lines for each peak"
80 help="Add blank lines in output for peaks with fewer than maximum number
81 of hits (--pad)" />
82 </when>
83 </conditional>
84 <param name="results_as_zip" type="boolean" checked="false" truevalue="yes"
85 label="Put output tab-delimited files into a single zip archive" />
86 </inputs>
87 <outputs>
88 <!-- Always produce XLS output -->
89 <data format="xls" name="xls_output"
90 label="All RnaChipIntegrator analyses for ${rnaseq.name} vs ${chipseq.name} (Excel spreadsheet)" />
91 <!-- Outputs only produced for summit data -->
92 <data format="tabular" name="peaks_to_transcripts_out"
93 label="Nearest summits to transcripts for ${rnaseq.name} vs ${chipseq.name}" >
94 <filter>analysis_options['peak_type'] == "summits"</filter>
95 <filter>results_as_zip is False</filter>
96 </data>
97 <data format="tabular" name="tss_to_summits_out"
98 label="Nearest TSS to summits for ${rnaseq.name} vs ${chipseq.name}" >
99 <filter>analysis_options['peak_type'] == "summits"</filter>
100 <filter>results_as_zip is False</filter>
101 </data>
102 <!-- Outputs only produced for peak data -->
103 <data format="tabular" name="transcripts_to_edges_out"
104 label="Nearest transcripts to peak edges for ${rnaseq.name} vs ${chipseq.name}" >
105 <filter>analysis_options['peak_type'] == "regions"</filter>
106 <filter>results_as_zip is False</filter>
107 </data>
108 <data format="tabular" name="transcripts_to_edges_summary"
109 label="Nearest transcripts to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" >
110 <filter>analysis_options['peak_type'] == "regions"</filter>
111 <filter>results_as_zip is False</filter>
112 </data>
113 <data format="tabular" name="tss_to_edges_out"
114 label="Nearest TSS to peak edges for ${rnaseq.name} vs ${chipseq.name}" >
115 <filter>analysis_options['peak_type'] == "regions"</filter>
116 <filter>results_as_zip is False</filter>
117 </data>
118 <data format="tabular" name="tss_to_edges_summary"
119 label="Nearest TSS to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" >
120 <filter>analysis_options['peak_type'] == "regions"</filter>
121 <filter>results_as_zip is False</filter>
122 </data>
123 <data format="zip" name="zip_file"
124 label="All tab-delimited files for ${rnaseq.name} vs ${chipseq.name} (zip file)" >
125 <filter>results_as_zip is True</filter>
126 </data>
127 </outputs>
128 <tests>
129 <test>
130 <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" />
131 <param name="chipseq" value="ChIP_summits.txt" ftype="tabular" />
132 <param name="peak_type" value="summits" />
133 <param name="window" value="20000" />
134 <param name="cutoff" value="130000" />
135 <!--
136 **NB** outputs have to be specified in order that they appear in the
137 tool (which is the order they will be written to the history) - the
138 test framework seems to use the order and ignores the "name" attribute
139 -->
140 <output name="xls_output" file="summits.xls" compare="sim_size" />
141 <output name="peaks_to_transcripts_out" file="peaks_to_transcripts.out" ftype="tabular" />
142 <output name="tss_to_summits_out" file="tss_to_summits.out" ftype="tabular" />
143 </test>
144 <test>
145 <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" />
146 <param name="chipseq" value="ChIP_peaks.txt" ftype="tabular" />
147 <param name="peak_type" value="regions" />
148 <param name="edge_cutoff" value="130000" />
149 <!--
150 **NB** outputs have to be specified in order that they appear in the
151 tool (which is the order they will be written to the history) - the
152 test framework seems to use the order and ignores the "name" attribute
153 -->
154 <output name="xls_output" file="peaks.xls" compare="sim_size" />
155 <output name="transcripts_to_edges_out" file="transcripts_to_edges.out" ftype="tabular" />
156 <output name="transcripts_to_edges_summary" file="transcripts_to_edges.summary" ftype="tabular" />
157 <output name="tss_to_edges_out" file="tss_to_edges.out" ftype="tabular" />
158 <output name="tss_to_edges_summary" file="tss_to_edges.summary" ftype="tabular" />
159 </test>
160 </tests>
161 <help>
162
163 .. class:: infomark
164
165 **What it does**
166
167 Run RnaChipIntegrator to perform integrated analyses of gene expression
168 and ChIP data, identifying the nearest ChIP peaks to each transcript
169 and vice versa.
170
171 For ChIP peaks defined as regions the following analyses are performed:
172
173 * **TranscriptsToPeakEdges**: reports the nearest transcripts with the smallest
174 distance from either their TSS or TES to the nearest peak edge.
175
176 * **TSSToPeakEdges**: reports the nearest transcripts with the smallest distance
177 from their TSS to the nearest peak edge.
178
179 For ChIP peaks defined as summits:
180
181 * **TSSToSummits**: reports the nearest transcripts with the smallest distance
182 from the TSS to the nearest peak summit.
183
184 * **PeaksToTranscripts**: reports the nearest peak summits with the smallest
185 distance to either the TSS or TES of each transcript.
186
187 The program was originally written specifically for ChIP-Seq and RNA-Seq data
188 but works equally well for ChIP-chip and microarray expression data, and can
189 also be used to integrate any set of genomic features (e.g. canonical genes,
190 CpG islands) with expression data.
191
192 RnaChipIntgerator can be obtained from
193 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
194
195 -------------
196
197 .. class:: infomark
198
199 **Input**
200
201 The expression data must be in a tab-delimited file with the following columns
202 of data for each genomic feature (one feature per line):
203
204 ====== ========== ======================================================================
205 Column Name Description
206 ====== ========== ======================================================================
207 1 ID Name used to identify the feature in the output
208 2 chr Chromosome name
209 3 start Start position of the feature
210 4 end End position of the feature
211 5 strand Must be either '+' or '-'
212 6 diff_expr Optional: indicates feature is differentially expressed (1) or not (0)
213 ====== ========== ======================================================================
214
215 The ChIP-seq data must be in a tab-delimited file with 3 columns of data for each
216 ChIP peak (one per line):
217
218 ====== ========== ======================================================================
219 Column Name Description
220 ====== ========== ======================================================================
221 1 chr Chromosome name (must match one of those in expression data file)
222 2 start Start position of the peak
223 3 end End position of the peak (start + 1 for summit data)
224 ====== ========== ======================================================================
225
226 The ChIP peak data can be either the summit (in which case 'end' - 'start' = 1) or the
227 entire extent of the binding region (with 'start' and 'end' indicating the limits).
228
229 -------------
230
231 .. class:: infomark
232
233 **Output**
234
235 The outputs from this tool vary depending on the type of data that is input, however
236 generally there is one tab-delimited results file for each analysis described above
237 in the **What it does** section (some analyses output a second file with just the
238 "best" hits).
239
240 A history item will be generated for each output file, unless the option to put them
241 into a single zip archive is selected; this archive file will have to be downloaded
242 and unzipped on your local machine. It is recommended that you refer to the
243 RnaChipIntegrator documentation for information on the contents of each output file:
244 https://github.com/fls-bioinformatics-core/RnaChipIntegrator/blob/master/doc/MANUAL.markdown
245
246 In addition an Excel spreadsheet (with one page for each analysis performed) is always
247 produced.
248
249 -------------
250
251 .. class:: infomark
252
253 **Credits**
254
255 This Galaxy tool has been developed within the Bioinformatics Core Facility at the
256 University of Manchester. It runs the RnaChipIntegrator package which has also been
257 developed by this group, and is documented at
258 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
259
260 Please kindly acknowledge the Bioinformatics Core Facility if you use this tool.
261 </help>
262 </tool>