Mercurial > repos > pjbriggs > rnachipintegrator
comparison rnachipintegrator_wrapper.xml @ 1:5f69a2c1b9c9 draft
Uploaded version 1.0.0.0.
author | pjbriggs |
---|---|
date | Wed, 24 Feb 2016 09:39:14 -0500 |
parents | d9c1f2133124 |
children | dc498b03ca9a |
comparison
equal
deleted
inserted
replaced
0:d9c1f2133124 | 1:5f69a2c1b9c9 |
---|---|
1 <?xml version="1.0" encoding="utf-8"?> | 1 <?xml version="1.0" encoding="utf-8"?> |
2 <tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@-0"> | 2 <tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@.0"> |
3 <description>Integrated analysis of gene expression data and ChIP data</description> | 3 <description>Integrated analysis of 'gene' and 'peak' data</description> |
4 <macros> | 4 <macros> |
5 <import>rnachipintegrator_macros.xml</import> | 5 <import>rnachipintegrator_macros.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements" /> | 7 <expand macro="requirements" /> |
8 <expand macro="version_command" /> | 8 <expand macro="version_command" /> |
9 <command interpreter="bash">rnachipintegrator_wrapper.sh | 9 <command interpreter="bash"><![CDATA[ |
10 #if str( $analysis_options.peak_type ) == "summits" | 10 rnachipintegrator_wrapper.sh |
11 #if str( $analysis_options.window ) != "" | 11 #if $peaks_in.metadata.chromCol |
12 --window=$analysis_options.window | 12 --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol} |
13 #end if | |
14 #if str( $cutoff ) != "" | |
15 --cutoff=$cutoff | |
16 #else | |
17 --cutoff=0 | |
18 #end if | |
19 #if str( $number ) != "" | |
20 --number=$number | |
21 #end if | |
22 --promoter_region=$promoter_start,$promoter_end | |
23 --edge=$edge | |
24 $diff_expressed_only | |
25 --xlsx_file "$xlsx_out" | |
26 --output_files "$peaks_per_feature_out" "$features_per_peak_out" | |
27 #if $output.compact_format | |
28 --compact | |
29 #else | |
30 #if $output.summary | |
31 --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary" | |
13 #end if | 32 #end if |
14 #if str( $analysis_options.cutoff ) != "" | 33 ${output.pad_output} |
15 --cutoff=$analysis_options.cutoff | 34 #end if |
16 #end if | 35 "$features_in" "$peaks_in" |
17 #end if | 36 ]]></command> |
18 #if str( $analysis_options.peak_type ) == "regions" | |
19 #if str( $analysis_options.edge_cutoff ) != "" | |
20 --edge-cutoff=$analysis_options.edge_cutoff | |
21 #end if | |
22 #if str( $analysis_options.number ) != "" | |
23 --number=$analysis_options.number | |
24 #end if | |
25 #if (str( $analysis_options.promoter_start ) != "" and str( $analysis_options.promoter_end )) | |
26 --promoter_region=$analysis_options.promoter_start,$analysis_options.promoter_end | |
27 #end if | |
28 #if $analysis_options.pad_output | |
29 --pad | |
30 #end if | |
31 #end if | |
32 $rnaseq $chipseq | |
33 --output_xls $xls_output | |
34 #if $results_as_zip | |
35 --zip_file $zip_file | |
36 #else | |
37 #if str( $analysis_options.peak_type ) == "summits" | |
38 --summit_outputs $peaks_to_transcripts_out $tss_to_summits_out | |
39 #end if | |
40 #if str( $analysis_options.peak_type ) == "regions" | |
41 --peak_outputs $transcripts_to_edges_out | |
42 $transcripts_to_edges_summary | |
43 $tss_to_edges_out | |
44 $tss_to_edges_summary | |
45 #end if | |
46 #end if | |
47 </command> | |
48 <inputs> | 37 <inputs> |
49 <param format="tabular" name="rnaseq" type="data" label="Gene expression data file" /> | 38 <param format="tabular" name="features_in" type="data" |
50 <param format="tabular" name="chipseq" type="data" label="ChIP peaks data file" /> | 39 label="Genes/genomic features" /> |
51 <conditional name="analysis_options"> | 40 <param format="tabular" name="peaks_in" type="data" |
52 <!-- user must specify if ChIP peaks are summits or regions --> | 41 label="Peaks/regions" /> |
53 <param name="peak_type" type="select" label="ChIP peaks are" | 42 <expand macro="analysis_options" /> |
54 help="Options and outputs depend on whether ChIP data are summits or regions"> | 43 <param name="diff_expressed_only" type="boolean" |
55 <option value="summits">summits</option> | 44 truevalue="--only-DE" falsevalue="" checked="false" |
56 <option value="regions">regions</option> | 45 label="Only consider genes which are flagged as differentially |
57 </param> | 46 expressed" |
58 <when value="summits"> | 47 help="NB input feature data must include differential expression |
59 <param name="window" type="integer" value="20000" optional="true" | 48 flags (--only-DE)" /> |
60 label="Maximum distance a peak can be from each transcript | 49 <expand macro="output_options" /> |
61 TSS before being omitted from analysis" /> | |
62 <param name="cutoff" type="integer" value="130000" optional="true" | |
63 label="Maximum distance a transcript TSS can be from each | |
64 peak before being omitted from the analysis" /> | |
65 </when> | |
66 <when value="regions"> | |
67 <param name="edge_cutoff" type="integer" value="10000" optional="true" | |
68 label="Maximum distance a transcript edge can be from the | |
69 peak edge before being omitted from the analysis" | |
70 help="Set to zero to indicate that no cut off should be applied" /> | |
71 <param name="number" type="integer" value="4" optional="true" | |
72 label="Maximum number of transcripts per peak to report from | |
73 from the analysis" /> | |
74 <param name="promoter_start" type="integer" value="-10000" optional="true" | |
75 label="Start of promoter region with respect to gene TSS" /> | |
76 <param name="promoter_end" type="integer" value="2500" optional="true" | |
77 label="End of promoter region with respect to gene TSS" /> | |
78 <param name="pad_output" type="boolean" checked="false" truevalue="yes" | |
79 label="Output same number of lines for each peak" | |
80 help="Add blank lines in output for peaks with fewer than maximum number | |
81 of hits (--pad)" /> | |
82 </when> | |
83 </conditional> | |
84 <param name="results_as_zip" type="boolean" checked="false" truevalue="yes" | |
85 label="Put output tab-delimited files into a single zip archive" /> | |
86 </inputs> | 50 </inputs> |
87 <outputs> | 51 <outputs> |
88 <!-- Always produce XLS output --> | 52 <!-- Always produce XLSX output --> |
89 <data format="xls" name="xls_output" | 53 <data format="xlsx" name="xlsx_out" |
90 label="All RnaChipIntegrator analyses for ${rnaseq.name} vs ${chipseq.name} (Excel spreadsheet)" /> | 54 label="All RnaChipIntegrator analyses: ${features_in.name} vs ${peaks_in.name} (Excel spreadsheet)" /> |
91 <!-- Outputs only produced for summit data --> | 55 <data format="tabular" name="peaks_per_feature_out" |
92 <data format="tabular" name="peaks_to_transcripts_out" | 56 label="Nearest peaks to each gene: ${features_in.name} vs ${peaks_in.name}" /> |
93 label="Nearest summits to transcripts for ${rnaseq.name} vs ${chipseq.name}" > | 57 <data format="tabular" name="features_per_peak_out" |
94 <filter>analysis_options['peak_type'] == "summits"</filter> | 58 label="Nearest genes to each peak: ${features_in.name} vs ${peaks_in.name}" /> |
95 <filter>results_as_zip is False</filter> | 59 <data format="tabular" name="peaks_per_feature_summary" |
60 label="Nearest peaks to each gene (summary): ${features_in.name} vs ${peaks_in.name}" > | |
61 <filter>output['compact_format'] is False</filter> | |
62 <filter>output['summary'] is True</filter> | |
96 </data> | 63 </data> |
97 <data format="tabular" name="tss_to_summits_out" | 64 <data format="tabular" name="features_per_peak_summary" |
98 label="Nearest TSS to summits for ${rnaseq.name} vs ${chipseq.name}" > | 65 label="Nearest gene to each peak (summary): ${features_in.name} vs ${peaks_in.name}" > |
99 <filter>analysis_options['peak_type'] == "summits"</filter> | 66 <filter>output['compact_format'] is False</filter> |
100 <filter>results_as_zip is False</filter> | 67 <filter>output['summary'] is True</filter> |
101 </data> | |
102 <!-- Outputs only produced for peak data --> | |
103 <data format="tabular" name="transcripts_to_edges_out" | |
104 label="Nearest transcripts to peak edges for ${rnaseq.name} vs ${chipseq.name}" > | |
105 <filter>analysis_options['peak_type'] == "regions"</filter> | |
106 <filter>results_as_zip is False</filter> | |
107 </data> | |
108 <data format="tabular" name="transcripts_to_edges_summary" | |
109 label="Nearest transcripts to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" > | |
110 <filter>analysis_options['peak_type'] == "regions"</filter> | |
111 <filter>results_as_zip is False</filter> | |
112 </data> | |
113 <data format="tabular" name="tss_to_edges_out" | |
114 label="Nearest TSS to peak edges for ${rnaseq.name} vs ${chipseq.name}" > | |
115 <filter>analysis_options['peak_type'] == "regions"</filter> | |
116 <filter>results_as_zip is False</filter> | |
117 </data> | |
118 <data format="tabular" name="tss_to_edges_summary" | |
119 label="Nearest TSS to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" > | |
120 <filter>analysis_options['peak_type'] == "regions"</filter> | |
121 <filter>results_as_zip is False</filter> | |
122 </data> | |
123 <data format="zip" name="zip_file" | |
124 label="All tab-delimited files for ${rnaseq.name} vs ${chipseq.name} (zip file)" > | |
125 <filter>results_as_zip is True</filter> | |
126 </data> | 68 </data> |
127 </outputs> | 69 </outputs> |
128 <tests> | 70 <tests> |
129 <test> | 71 <!-- |
130 <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" /> | 72 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt summits.txt |
131 <param name="chipseq" value="ChIP_summits.txt" ftype="tabular" /> | 73 --> |
132 <param name="peak_type" value="summits" /> | 74 <test> |
133 <param name="window" value="20000" /> | 75 <param name="features_in" value="features.txt" ftype="tabular" /> |
134 <param name="cutoff" value="130000" /> | 76 <param name="peaks_in" value="summits.txt" ftype="tabular" /> |
135 <!-- | 77 <param name="cutoff" value="130000" /> |
136 **NB** outputs have to be specified in order that they appear in the | 78 <param name="promoter_start" value="-10000" /> |
137 tool (which is the order they will be written to the history) - the | 79 <param name="promoter_end" value="2500" /> |
138 test framework seems to use the order and ignores the "name" attribute | 80 <output name="xlsx_out" file="summits.xlsx" compare="sim_size" /> |
139 --> | 81 <output name="peaks_per_feature_out" ftype="tabular" |
140 <output name="xls_output" file="summits.xls" compare="sim_size" /> | 82 file="summits_per_feature.out" /> |
141 <output name="peaks_to_transcripts_out" file="peaks_to_transcripts.out" ftype="tabular" /> | 83 <output name="features_per_peak_out" ftype="tabular" |
142 <output name="tss_to_summits_out" file="tss_to_summits.out" ftype="tabular" /> | 84 file="features_per_summit.out" /> |
143 </test> | 85 </test> |
144 <test> | 86 <!-- |
145 <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" /> | 87 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt peaks.txt |
146 <param name="chipseq" value="ChIP_peaks.txt" ftype="tabular" /> | 88 --> |
147 <param name="peak_type" value="regions" /> | 89 <test> |
148 <param name="edge_cutoff" value="130000" /> | 90 <param name="features_in" value="features.txt" ftype="tabular" /> |
149 <!-- | 91 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> |
150 **NB** outputs have to be specified in order that they appear in the | 92 <param name="cutoff" value="130000" /> |
151 tool (which is the order they will be written to the history) - the | 93 <param name="promoter_start" value="-10000" /> |
152 test framework seems to use the order and ignores the "name" attribute | 94 <param name="promoter_end" value="2500" /> |
153 --> | 95 <output name="xlsx_out" file="peaks1.xlsx" compare="sim_size" /> |
154 <output name="xls_output" file="peaks.xls" compare="sim_size" /> | 96 <output name="peaks_per_feature_out" ftype="tabular" |
155 <output name="transcripts_to_edges_out" file="transcripts_to_edges.out" ftype="tabular" /> | 97 file="peaks_per_feature1.out" /> |
156 <output name="transcripts_to_edges_summary" file="transcripts_to_edges.summary" ftype="tabular" /> | 98 <output name="features_per_peak_out" ftype="tabular" |
157 <output name="tss_to_edges_out" file="tss_to_edges.out" ftype="tabular" /> | 99 file="features_per_peak1.out" /> |
158 <output name="tss_to_edges_summary" file="tss_to_edges.summary" ftype="tabular" /> | 100 </test> |
101 <!-- | |
102 RnaChipIntegrator +name=test +cutoff=130000 +xlsx features.txt peaks.txt | |
103 --> | |
104 <test> | |
105 <param name="features_in" value="features.txt" ftype="tabular" /> | |
106 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
107 <param name="cutoff" value="130000" /> | |
108 <param name="compact_format" value="false" /> | |
109 <output name="xlsx_out" file="peaks2.xlsx" compare="sim_size" /> | |
110 <output name="peaks_per_feature_out" ftype="tabular" | |
111 file="peaks_per_feature2.out" /> | |
112 <output name="features_per_peak_out" ftype="tabular" | |
113 file="features_per_peak2.out" /> | |
114 </test> | |
115 <!-- | |
116 RnaChipIntegrator +name=test +cutoff=130000 +only-DE +xlsx +compact features.txt peaks.txt | |
117 --> | |
118 <test> | |
119 <param name="features_in" value="features.txt" ftype="tabular" /> | |
120 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
121 <param name="cutoff" value="130000" /> | |
122 <param name="diff_expressed_only" value="true" /> | |
123 <output name="xlsx_out" file="peaks3.xlsx" compare="sim_size" /> | |
124 <output name="peaks_per_feature_out" ftype="tabular" | |
125 file="peaks_per_feature3.out" /> | |
126 <output name="features_per_peak_out" ftype="tabular" | |
127 file="features_per_peak3.out" /> | |
128 </test> | |
129 <!-- | |
130 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt | |
131 --> | |
132 <test> | |
133 <param name="features_in" value="features.txt" ftype="tabular" /> | |
134 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
135 <param name="cutoff" value="130000" /> | |
136 <param name="compact_format" value="false" /> | |
137 <param name="summary" value="true" /> | |
138 <param name="pad_output" value="true" /> | |
139 <output name="xlsx_out" file="peaks4.xlsx" compare="sim_size" /> | |
140 <output name="peaks_per_feature_out" ftype="tabular" | |
141 file="peaks_per_feature4.out" /> | |
142 <output name="features_per_peak_out" ftype="tabular" | |
143 file="features_per_peak4.out" /> | |
144 <output name="peaks_per_feature_summary" ftype="tabular" | |
145 file="peaks_per_feature4.summary" /> | |
146 <output name="features_per_peak_summary" ftype="tabular" | |
147 file="features_per_peak4.summary" /> | |
159 </test> | 148 </test> |
160 </tests> | 149 </tests> |
161 <help> | 150 <help> |
162 | 151 |
163 .. class:: infomark | 152 .. class:: infomark |
164 | 153 |
165 **What it does** | 154 **What it does** |
166 | 155 |
167 Run RnaChipIntegrator to perform integrated analyses of gene expression | 156 Performs integrated analyses of genes (or other genomic feature data) |
168 and ChIP data, identifying the nearest ChIP peaks to each transcript | 157 gainst a set of peaks (e.g. ChIP data), identifying the nearest peaks to |
169 and vice versa. | 158 each feature and vice versa. |
170 | 159 |
171 For ChIP peaks defined as regions the following analyses are performed: | 160 The program was originally written specifically for ChIP-Seq and RNA-Seq |
172 | 161 data but works equally well for ChIP-chip and microarray expression data, |
173 * **TranscriptsToPeakEdges**: reports the nearest transcripts with the smallest | 162 and can also be used to integrate any set of genomic features (e.g. |
174 distance from either their TSS or TES to the nearest peak edge. | 163 canonical genes, CpG islands) with expression data. |
175 | 164 |
176 * **TSSToPeakEdges**: reports the nearest transcripts with the smallest distance | 165 RnaChipIntegrator can be obtained from |
177 from their TSS to the nearest peak edge. | 166 https://pypi.python.org/pypi/RnaChipIntegrator/ |
178 | |
179 For ChIP peaks defined as summits: | |
180 | |
181 * **TSSToSummits**: reports the nearest transcripts with the smallest distance | |
182 from the TSS to the nearest peak summit. | |
183 | |
184 * **PeaksToTranscripts**: reports the nearest peak summits with the smallest | |
185 distance to either the TSS or TES of each transcript. | |
186 | |
187 The program was originally written specifically for ChIP-Seq and RNA-Seq data | |
188 but works equally well for ChIP-chip and microarray expression data, and can | |
189 also be used to integrate any set of genomic features (e.g. canonical genes, | |
190 CpG islands) with expression data. | |
191 | |
192 RnaChipIntgerator can be obtained from | |
193 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/ | |
194 | 167 |
195 ------------- | 168 ------------- |
196 | 169 |
197 .. class:: infomark | 170 .. class:: infomark |
198 | 171 |
199 **Input** | 172 **Input** |
200 | 173 |
201 The expression data must be in a tab-delimited file with the following columns | 174 The gene data must be in a tabular file with the following columns |
202 of data for each genomic feature (one feature per line): | 175 of data for each gene or genomic feature (one gene per line): |
203 | 176 |
204 ====== ========== ====================================================================== | 177 ====== ========== ====================================================================== |
205 Column Name Description | 178 Column Name Description |
206 ====== ========== ====================================================================== | 179 ====== ========== ====================================================================== |
207 1 ID Name used to identify the feature in the output | 180 1 ID Name used to identify the gene in the output |
208 2 chr Chromosome name | 181 2 chr Chromosome name |
209 3 start Start position of the feature | 182 3 start Start position of the gene |
210 4 end End position of the feature | 183 4 end End position of the gene |
211 5 strand Must be either '+' or '-' | 184 5 strand Must be either '+' or '-' |
212 6 diff_expr Optional: indicates feature is differentially expressed (1) or not (0) | 185 6 diff_expr Optional: indicates gene is differentially expressed (1) or not (0) |
213 ====== ========== ====================================================================== | 186 ====== ========== ====================================================================== |
214 | 187 |
215 The ChIP-seq data must be in a tab-delimited file with 3 columns of data for each | 188 The peak data must be in a tabular file with at least 3 columns of data |
216 ChIP peak (one per line): | 189 for each peak (one peak per line): |
217 | 190 |
218 ====== ========== ====================================================================== | 191 ====== ========== ================================= |
219 Column Name Description | 192 Column Name Description |
220 ====== ========== ====================================================================== | 193 ====== ========== ================================= |
221 1 chr Chromosome name (must match one of those in expression data file) | 194 1 chr Chromosome name |
222 2 start Start position of the peak | 195 2 start Start position of the peak |
223 3 end End position of the peak (start + 1 for summit data) | 196 3 end End position of the peak |
224 ====== ========== ====================================================================== | 197 ====== ========== ================================= |
225 | 198 |
226 The ChIP peak data can be either the summit (in which case 'end' - 'start' = 1) or the | 199 If peak data is in ``bed`` format then the tool will automatically |
227 entire extent of the binding region (with 'start' and 'end' indicating the limits). | 200 assign the correct columns, otherwise the first three columns of data |
228 | 201 will be used. |
229 ------------- | 202 |
230 | 203 ------------- |
231 .. class:: infomark | 204 |
232 | 205 .. class:: infomark |
233 **Output** | 206 |
234 | 207 **Outputs** |
235 The outputs from this tool vary depending on the type of data that is input, however | 208 |
236 generally there is one tab-delimited results file for each analysis described above | 209 The key outputs from the tool are two lists compromising the nearest |
237 in the **What it does** section (some analyses output a second file with just the | 210 peaks for each gene, and the nearest gene for each peak (one dataset |
238 "best" hits). | 211 for each list). |
239 | 212 |
240 A history item will be generated for each output file, unless the option to put them | 213 There are two formats for reporting: "compact" and "full": |
241 into a single zip archive is selected; this archive file will have to be downloaded | 214 |
242 and unzipped on your local machine. It is recommended that you refer to the | 215 * **Compact output** reports all the hits for each peak or gene on |
243 RnaChipIntegrator documentation for information on the contents of each output file: | 216 a single line of output; |
244 https://github.com/fls-bioinformatics-core/RnaChipIntegrator/blob/master/doc/MANUAL.markdown | 217 * **Full output** reports each peak/gene pair on a separate line |
245 | 218 (i.e. a multi-line output format). |
246 In addition an Excel spreadsheet (with one page for each analysis performed) is always | 219 |
247 produced. | 220 In "full" output mode, additional options are available: |
221 | |
222 * The output files can be "padded" with extra (empty) lines to ensure | |
223 that there are always the same number of lines for each peak or | |
224 gene, if fewer than the requested number of hits are found. | |
225 * "Summary" datasets can also be requested, which include just the | |
226 nearest peak reported for each gene (and vice versa). | |
227 | |
228 In either mode these data will also be output in a single MS Excel file, | |
229 which contains one sheet per result set. | |
230 | |
231 .. class:: warning | |
232 | |
233 Using "compact" output with the number of hits limited to more than 4 | |
234 peak/gene pairs (or with no limit at all) can result in a large number | |
235 of columns in the output files, which in some versions of Galaxy will | |
236 not be properly displayed. However the data files themselves should be | |
237 okay. | |
238 | |
239 ------------- | |
240 | |
241 .. class:: informark | |
242 | |
243 **More information** | |
244 | |
245 It is recommended that you refer to the ``RnaChipIntegrator`` | |
246 documentation for information on the contents of each output file: | |
247 | |
248 * http://rnachipintegrator.readthedocs.org/en/latest/ | |
248 | 249 |
249 ------------- | 250 ------------- |
250 | 251 |
251 .. class:: infomark | 252 .. class:: infomark |
252 | 253 |
257 developed by this group, and is documented at | 258 developed by this group, and is documented at |
258 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/ | 259 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/ |
259 | 260 |
260 Please kindly acknowledge the Bioinformatics Core Facility if you use this tool. | 261 Please kindly acknowledge the Bioinformatics Core Facility if you use this tool. |
261 </help> | 262 </help> |
263 <expand macro="citations" /> | |
262 </tool> | 264 </tool> |