0
|
1 <?xml version="1.0" encoding="utf-8"?>
|
1
|
2 <tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@.0">
|
|
3 <description>Integrated analysis of 'gene' and 'peak' data</description>
|
0
|
4 <macros>
|
|
5 <import>rnachipintegrator_macros.xml</import>
|
|
6 </macros>
|
|
7 <expand macro="requirements" />
|
|
8 <expand macro="version_command" />
|
1
|
9 <command interpreter="bash"><![CDATA[
|
|
10 rnachipintegrator_wrapper.sh
|
|
11 #if $peaks_in.metadata.chromCol
|
|
12 --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol}
|
0
|
13 #end if
|
1
|
14 #if str( $cutoff ) != ""
|
|
15 --cutoff=$cutoff
|
|
16 #else
|
|
17 --cutoff=0
|
0
|
18 #end if
|
1
|
19 #if str( $number ) != ""
|
|
20 --number=$number
|
0
|
21 #end if
|
1
|
22 --promoter_region=$promoter_start,$promoter_end
|
|
23 --edge=$edge
|
|
24 $diff_expressed_only
|
|
25 --xlsx_file "$xlsx_out"
|
|
26 --output_files "$peaks_per_feature_out" "$features_per_peak_out"
|
|
27 #if $output.compact_format
|
|
28 --compact
|
|
29 #else
|
|
30 #if $output.summary
|
|
31 --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary"
|
|
32 #end if
|
|
33 ${output.pad_output}
|
|
34 #end if
|
|
35 "$features_in" "$peaks_in"
|
|
36 ]]></command>
|
0
|
37 <inputs>
|
1
|
38 <param format="tabular" name="features_in" type="data"
|
|
39 label="Genes/genomic features" />
|
|
40 <param format="tabular" name="peaks_in" type="data"
|
|
41 label="Peaks/regions" />
|
|
42 <expand macro="analysis_options" />
|
|
43 <param name="diff_expressed_only" type="boolean"
|
|
44 truevalue="--only-DE" falsevalue="" checked="false"
|
|
45 label="Only consider genes which are flagged as differentially
|
|
46 expressed"
|
|
47 help="NB input feature data must include differential expression
|
|
48 flags (--only-DE)" />
|
|
49 <expand macro="output_options" />
|
0
|
50 </inputs>
|
|
51 <outputs>
|
1
|
52 <!-- Always produce XLSX output -->
|
|
53 <data format="xlsx" name="xlsx_out"
|
|
54 label="All RnaChipIntegrator analyses: ${features_in.name} vs ${peaks_in.name} (Excel spreadsheet)" />
|
|
55 <data format="tabular" name="peaks_per_feature_out"
|
|
56 label="Nearest peaks to each gene: ${features_in.name} vs ${peaks_in.name}" />
|
|
57 <data format="tabular" name="features_per_peak_out"
|
|
58 label="Nearest genes to each peak: ${features_in.name} vs ${peaks_in.name}" />
|
|
59 <data format="tabular" name="peaks_per_feature_summary"
|
|
60 label="Nearest peaks to each gene (summary): ${features_in.name} vs ${peaks_in.name}" >
|
|
61 <filter>output['compact_format'] is False</filter>
|
|
62 <filter>output['summary'] is True</filter>
|
0
|
63 </data>
|
1
|
64 <data format="tabular" name="features_per_peak_summary"
|
|
65 label="Nearest gene to each peak (summary): ${features_in.name} vs ${peaks_in.name}" >
|
|
66 <filter>output['compact_format'] is False</filter>
|
|
67 <filter>output['summary'] is True</filter>
|
0
|
68 </data>
|
|
69 </outputs>
|
|
70 <tests>
|
1
|
71 <!--
|
|
72 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt summits.txt
|
|
73 -->
|
0
|
74 <test>
|
1
|
75 <param name="features_in" value="features.txt" ftype="tabular" />
|
|
76 <param name="peaks_in" value="summits.txt" ftype="tabular" />
|
0
|
77 <param name="cutoff" value="130000" />
|
1
|
78 <param name="promoter_start" value="-10000" />
|
|
79 <param name="promoter_end" value="2500" />
|
|
80 <output name="xlsx_out" file="summits.xlsx" compare="sim_size" />
|
|
81 <output name="peaks_per_feature_out" ftype="tabular"
|
|
82 file="summits_per_feature.out" />
|
|
83 <output name="features_per_peak_out" ftype="tabular"
|
|
84 file="features_per_summit.out" />
|
|
85 </test>
|
|
86 <!--
|
|
87 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt peaks.txt
|
|
88 -->
|
|
89 <test>
|
|
90 <param name="features_in" value="features.txt" ftype="tabular" />
|
|
91 <param name="peaks_in" value="peaks.txt" ftype="tabular" />
|
|
92 <param name="cutoff" value="130000" />
|
|
93 <param name="promoter_start" value="-10000" />
|
|
94 <param name="promoter_end" value="2500" />
|
|
95 <output name="xlsx_out" file="peaks1.xlsx" compare="sim_size" />
|
|
96 <output name="peaks_per_feature_out" ftype="tabular"
|
|
97 file="peaks_per_feature1.out" />
|
|
98 <output name="features_per_peak_out" ftype="tabular"
|
|
99 file="features_per_peak1.out" />
|
0
|
100 </test>
|
1
|
101 <!--
|
|
102 RnaChipIntegrator +name=test +cutoff=130000 +xlsx features.txt peaks.txt
|
|
103 -->
|
0
|
104 <test>
|
1
|
105 <param name="features_in" value="features.txt" ftype="tabular" />
|
|
106 <param name="peaks_in" value="peaks.txt" ftype="tabular" />
|
|
107 <param name="cutoff" value="130000" />
|
|
108 <param name="compact_format" value="false" />
|
|
109 <output name="xlsx_out" file="peaks2.xlsx" compare="sim_size" />
|
|
110 <output name="peaks_per_feature_out" ftype="tabular"
|
|
111 file="peaks_per_feature2.out" />
|
|
112 <output name="features_per_peak_out" ftype="tabular"
|
|
113 file="features_per_peak2.out" />
|
|
114 </test>
|
|
115 <!--
|
|
116 RnaChipIntegrator +name=test +cutoff=130000 +only-DE +xlsx +compact features.txt peaks.txt
|
|
117 -->
|
|
118 <test>
|
|
119 <param name="features_in" value="features.txt" ftype="tabular" />
|
|
120 <param name="peaks_in" value="peaks.txt" ftype="tabular" />
|
|
121 <param name="cutoff" value="130000" />
|
|
122 <param name="diff_expressed_only" value="true" />
|
|
123 <output name="xlsx_out" file="peaks3.xlsx" compare="sim_size" />
|
|
124 <output name="peaks_per_feature_out" ftype="tabular"
|
|
125 file="peaks_per_feature3.out" />
|
|
126 <output name="features_per_peak_out" ftype="tabular"
|
|
127 file="features_per_peak3.out" />
|
|
128 </test>
|
|
129 <!--
|
|
130 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt
|
|
131 -->
|
|
132 <test>
|
|
133 <param name="features_in" value="features.txt" ftype="tabular" />
|
|
134 <param name="peaks_in" value="peaks.txt" ftype="tabular" />
|
|
135 <param name="cutoff" value="130000" />
|
|
136 <param name="compact_format" value="false" />
|
|
137 <param name="summary" value="true" />
|
|
138 <param name="pad_output" value="true" />
|
|
139 <output name="xlsx_out" file="peaks4.xlsx" compare="sim_size" />
|
|
140 <output name="peaks_per_feature_out" ftype="tabular"
|
|
141 file="peaks_per_feature4.out" />
|
|
142 <output name="features_per_peak_out" ftype="tabular"
|
|
143 file="features_per_peak4.out" />
|
|
144 <output name="peaks_per_feature_summary" ftype="tabular"
|
|
145 file="peaks_per_feature4.summary" />
|
|
146 <output name="features_per_peak_summary" ftype="tabular"
|
|
147 file="features_per_peak4.summary" />
|
0
|
148 </test>
|
|
149 </tests>
|
|
150 <help>
|
|
151
|
|
152 .. class:: infomark
|
|
153
|
|
154 **What it does**
|
|
155
|
1
|
156 Performs integrated analyses of genes (or other genomic feature data)
|
|
157 gainst a set of peaks (e.g. ChIP data), identifying the nearest peaks to
|
|
158 each feature and vice versa.
|
0
|
159
|
1
|
160 The program was originally written specifically for ChIP-Seq and RNA-Seq
|
|
161 data but works equally well for ChIP-chip and microarray expression data,
|
|
162 and can also be used to integrate any set of genomic features (e.g.
|
|
163 canonical genes, CpG islands) with expression data.
|
0
|
164
|
1
|
165 RnaChipIntegrator can be obtained from
|
|
166 https://pypi.python.org/pypi/RnaChipIntegrator/
|
0
|
167
|
|
168 -------------
|
|
169
|
|
170 .. class:: infomark
|
|
171
|
|
172 **Input**
|
|
173
|
1
|
174 The gene data must be in a tabular file with the following columns
|
|
175 of data for each gene or genomic feature (one gene per line):
|
0
|
176
|
|
177 ====== ========== ======================================================================
|
|
178 Column Name Description
|
|
179 ====== ========== ======================================================================
|
1
|
180 1 ID Name used to identify the gene in the output
|
0
|
181 2 chr Chromosome name
|
1
|
182 3 start Start position of the gene
|
|
183 4 end End position of the gene
|
0
|
184 5 strand Must be either '+' or '-'
|
1
|
185 6 diff_expr Optional: indicates gene is differentially expressed (1) or not (0)
|
0
|
186 ====== ========== ======================================================================
|
|
187
|
1
|
188 The peak data must be in a tabular file with at least 3 columns of data
|
|
189 for each peak (one peak per line):
|
0
|
190
|
1
|
191 ====== ========== =================================
|
0
|
192 Column Name Description
|
1
|
193 ====== ========== =================================
|
|
194 1 chr Chromosome name
|
0
|
195 2 start Start position of the peak
|
1
|
196 3 end End position of the peak
|
|
197 ====== ========== =================================
|
0
|
198
|
1
|
199 If peak data is in ``bed`` format then the tool will automatically
|
|
200 assign the correct columns, otherwise the first three columns of data
|
|
201 will be used.
|
0
|
202
|
|
203 -------------
|
|
204
|
|
205 .. class:: infomark
|
|
206
|
1
|
207 **Outputs**
|
|
208
|
|
209 The key outputs from the tool are two lists compromising the nearest
|
|
210 peaks for each gene, and the nearest gene for each peak (one dataset
|
|
211 for each list).
|
|
212
|
|
213 There are two formats for reporting: "compact" and "full":
|
0
|
214
|
1
|
215 * **Compact output** reports all the hits for each peak or gene on
|
|
216 a single line of output;
|
|
217 * **Full output** reports each peak/gene pair on a separate line
|
|
218 (i.e. a multi-line output format).
|
|
219
|
|
220 In "full" output mode, additional options are available:
|
|
221
|
|
222 * The output files can be "padded" with extra (empty) lines to ensure
|
|
223 that there are always the same number of lines for each peak or
|
|
224 gene, if fewer than the requested number of hits are found.
|
|
225 * "Summary" datasets can also be requested, which include just the
|
|
226 nearest peak reported for each gene (and vice versa).
|
0
|
227
|
1
|
228 In either mode these data will also be output in a single MS Excel file,
|
|
229 which contains one sheet per result set.
|
|
230
|
|
231 .. class:: warning
|
|
232
|
|
233 Using "compact" output with the number of hits limited to more than 4
|
|
234 peak/gene pairs (or with no limit at all) can result in a large number
|
|
235 of columns in the output files, which in some versions of Galaxy will
|
|
236 not be properly displayed. However the data files themselves should be
|
|
237 okay.
|
0
|
238
|
1
|
239 -------------
|
|
240
|
|
241 .. class:: informark
|
|
242
|
|
243 **More information**
|
|
244
|
|
245 It is recommended that you refer to the ``RnaChipIntegrator``
|
|
246 documentation for information on the contents of each output file:
|
|
247
|
|
248 * http://rnachipintegrator.readthedocs.org/en/latest/
|
0
|
249
|
|
250 -------------
|
|
251
|
|
252 .. class:: infomark
|
|
253
|
|
254 **Credits**
|
|
255
|
|
256 This Galaxy tool has been developed within the Bioinformatics Core Facility at the
|
|
257 University of Manchester. It runs the RnaChipIntegrator package which has also been
|
|
258 developed by this group, and is documented at
|
|
259 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
|
|
260
|
|
261 Please kindly acknowledge the Bioinformatics Core Facility if you use this tool.
|
|
262 </help>
|
1
|
263 <expand macro="citations" />
|
0
|
264 </tool>
|