comparison computeMatrix.xml @ 1:275ed3e83de0 draft

planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit fef8b344925620444d93d8159c0b2731a5777920
author bgruening
date Mon, 15 Feb 2016 10:34:00 -0500
parents 14cb57237e46
children a842285199d9
comparison
equal deleted inserted replaced
0:14cb57237e46 1:275ed3e83de0
6 </macros> 6 </macros>
7 <expand macro="requirements" /> 7 <expand macro="requirements" />
8 <command> 8 <command>
9 <![CDATA[ 9 <![CDATA[
10 #import tempfile 10 #import tempfile
11 #set bw_files=[]
12 #for $counter, $bigwig in enumerate($scoreFileName):
13 ln -s "${bigwig}" "file_${counter}.bw" &&
14 #silent $bw_files.append('file_%s.bw' % $counter)
15 #end for
16
17 #set bed_files=[]
18 #for $counter, $rf in enumerate($regionsFiles):
19 ln -s "${rf.regionsFile}" "group_${counter}.bed" &&
20 #silent $bed_files.append('group_%s.bed' % $counter)
21 #end for
11 22
12 @BINARY@ 23 @BINARY@
13 24
14 $mode.mode_select 25 $mode.mode_select
15 --regionsFileName 26 --regionsFileName '#echo "' '".join($bed_files)#'
16 #for $rf in $regionsFiles: 27
17 '$rf.regionsFile' 28 --scoreFileName '#echo "' '".join($bw_files)#'
18 #end for 29
19 --scoreFileName
20 #for $bw in $scoreFileName:
21 '$bw'
22 #end for
23 --outFileName '$outFileName' 30 --outFileName '$outFileName'
24 31
25 @THREADS@ 32 @THREADS@
26 33
27 #if $output.showOutputSettings == "yes" 34 #if $output.showOutputSettings == "yes"
49 56
50 #if $advancedOpt.showAdvancedOpt == "yes": 57 #if $advancedOpt.showAdvancedOpt == "yes":
51 --sortRegions '$advancedOpt.sortRegions' 58 --sortRegions '$advancedOpt.sortRegions'
52 --sortUsing '$advancedOpt.sortUsing' 59 --sortUsing '$advancedOpt.sortUsing'
53 --averageTypeBins '$advancedOpt.averageTypeBins' 60 --averageTypeBins '$advancedOpt.averageTypeBins'
54 $advancedOpt.skipNAs
55 $advancedOpt.skipZeros 61 $advancedOpt.skipZeros
62 $advancedOpt.missingDataAsZero
56 --binSize $advancedOpt.binSize 63 --binSize $advancedOpt.binSize
57 64
58 #if $advancedOpt.minThreshold is not None and str($advancedOpt.minThreshold) != '': 65 #if $advancedOpt.minThreshold is not None and str($advancedOpt.minThreshold) != '':
59 --minThreshold $advancedOpt.minThreshold 66 --minThreshold $advancedOpt.minThreshold
60 #end if 67 #end if
75 help="File, in BED format, containing the regions to plot."/> 82 help="File, in BED format, containing the regions to plot."/>
76 </repeat> 83 </repeat>
77 84
78 <param name="scoreFileName" format="bigwig" type="data" 85 <param name="scoreFileName" format="bigwig" type="data"
79 label="Score file" multiple="True" 86 label="Score file" multiple="True"
80 help="You can generate a bigWig file from either a 87 help="You can generate a bigWig file from a BAM file using the
81 bedGraph or WIG file using UCSC tools or from a BAM file using the
82 bamCoverage tool. (--scoreFileName)"/> 88 bamCoverage tool. (--scoreFileName)"/>
83 89
84 <conditional name="mode" > 90 <conditional name="mode" >
85 <param name="mode_select" type="select" 91 <param name="mode_select" type="select"
86 label="computeMatrix has two main output options" 92 label="computeMatrix has two main output options"
87 help="In the scale-regions mode, all regions in the BED file are 93 help="In the scale-regions mode, all regions in the BED file are
88 stretched or shrunk to the same length (in bases) that is indicated 94 stretched or shrunken to the same length (in bases) that is indicated
89 by the user. Reference-point refers to a position within the BED 95 by the user. Reference-point refers to a position within the BED
90 regions (e.g start of region). In the reference-point mode only 96 regions (start or end of each region). In the reference-point mode only
91 those genomic positions before (upstream) and/or after (downstream) 97 those genomic positions before (upstream) and/or after (downstream)
92 the reference point will be considered."> 98 the reference point will be considered.">
93 <option value="scale-regions" selected="true">scale-regions</option> 99 <option value="scale-regions" selected="true">scale-regions</option>
94 <option value="reference-point">reference-point</option> 100 <option value="reference-point">reference-point</option>
95 </param> 101 </param>
162 <option value="sum">sum</option> 168 <option value="sum">sum</option>
163 <option value="std">std</option> 169 <option value="std">std</option>
164 </param> 170 </param>
165 171
166 <param name="missingDataAsZero" type="boolean" truevalue="--missingDataAsZero" falsevalue="" checked="False" 172 <param name="missingDataAsZero" type="boolean" truevalue="--missingDataAsZero" falsevalue="" checked="False"
167 label="Convert missing values to 0?." 173 label="Convert missing values to 0?"
168 help="If set to 'yes', missing values (NAs) are converted to 0. 174 help="If set to 'yes', missing values (NAs) are converted
169 The default is to ignore such cases, which will be 175 to 0. If you want to use clustering with plotHeatmap
170 depicted as black areas once a heatmap is created." /> 176 or plotProfile, set this to 'yes'.
177 The default is to ignore missing values, which will be
178 depicted as black areas once a heatmap is created.
179 (--missingDataAsZero)" />
171 180
172 <expand macro="skipZeros" /> 181 <expand macro="skipZeros" />
173 <expand macro="skipNAs" />
174 182
175 <param name="minThreshold" type="float" optional="True" 183 <param name="minThreshold" type="float" optional="True"
176 label="Minimum threshold" 184 label="Minimum threshold"
177 help="Any region containing a value that is equal or less than this numeric 185 help="Any region containing a value that is equal or less than this numeric
178 value will be skipped. This is useful to skip, for example, genes where the 186 value will be skipped. This is useful to skip, for example, genes where the
179 read count is zero for any of the bins. This could be the result of 187 read count is zero for any of the bins which could be the result of
180 unmappable areas and can bias the overall results. (--minThreshold)"/> 188 unmappable areas and can bias the overall results. (--minThreshold)"/>
181 <param name="maxThreshold" type="float" optional="True" 189 <param name="maxThreshold" type="float" optional="True"
182 label="Maximum threshold" 190 label="Maximum threshold"
183 help="Any region containing a value that is equal or higher that this 191 help="Any region containing a value that is equal or higher that this
184 numeric value will be skipped. The max threshold is useful to skip those 192 numeric value will be skipped. The max threshold is useful to skip those
204 <param name="showAdvancedOpt" value="yes" /> 212 <param name="showAdvancedOpt" value="yes" />
205 <param name="mode_select" value="reference-point" /> 213 <param name="mode_select" value="reference-point" />
206 <param name="binSize" value="10" /> 214 <param name="binSize" value="10" />
207 <param name="sortUsing" value="sum" /> 215 <param name="sortUsing" value="sum" />
208 <param name="averageTypeBins" value="sum" /> 216 <param name="averageTypeBins" value="sum" />
209 <param name="skipNAs" value="False" />
210 <param name="beforeRegionStartLength" value="10" /> 217 <param name="beforeRegionStartLength" value="10" />
211 <param name="afterRegionStartLength" value="10" /> 218 <param name="afterRegionStartLength" value="10" />
212 <output name="outFileName" file="computeMatrix_result1.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" /> 219 <output name="outFileName" file="computeMatrix_result1.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" />
213 </test> 220 </test>
214 <test> 221 <test>
231 <output name="outFileName" file="computeMatrix_result3.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" /> 238 <output name="outFileName" file="computeMatrix_result3.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" />
232 </test> 239 </test>
233 </tests> 240 </tests>
234 <help> 241 <help>
235 <![CDATA[ 242 <![CDATA[
236 **What it does** 243
244 What it does
245 ----------------
237 246
238 This tool prepares an intermediate file (a gzipped table of values) 247 This tool prepares an intermediate file (a gzipped table of values)
239 that contains scores associated with genomic regions and can be used 248 that contains scores associated with genomic regions.
240 afterwards to plot a heatmap or profile. 249 The regions can either be scaled to the same size (using the ``scale-regions`` mode) or you can choose the start, end, or center of each region as the focus point for the score calculations.
241 250 For more details, check out the explanation `here <file:///Users/frd2007/Documents/MPI/deepTools/docs/_build/html/content/tools/computeMatrix.html#details>`_.
242 Genomic regions can really be anything - genes, parts of genes, ChIP-seq 251
243 peaks, favorite genome regions... as long as you provide a proper file 252 The intermediate file produced by ``computeMatrix`` is meant to be used with ``plotHeatmap`` and ``plotProfile``.
244 in BED or INTERVAL format. If you would like to compare different groups of regions 253 See the descriptions of ``plotHeatmap`` and ``plotProfile`` for example plots.
245 (i.e. genes from chromosome 2 and 3), you can supply more than 1 BED file, one for each group. 254
246 255 .. image:: $PATH_TO_IMAGES/computeMatrix_overview.png
247 computeMatrix can also be used to filter and sort
248 regions according to their score by making use of its advanced output options.
249
250
251 .. image:: $PATH_TO_IMAGES/flowChart_computeMatrixetc.png
252 :alt: Relationship between computeMatrix, heatmapper and profiler 256 :alt: Relationship between computeMatrix, heatmapper and profiler
253 257 :width: 600
254 258 :height: 418
255 You can find more details on the computeMatrix doc page: https://deeptools.readthedocs.org/en/master/content/tools/computeMatrix.html 259
256 260 =======
261
262 Usage hints
263 -------------
264
265 The supplied genomic regions can really be anything - genes, parts of genes, ChIP-seq peaks, favorite genome regions... as long as you provide a proper file
266 in BED or INTERVAL format. If you would like to compare different groups of regions (e.g., genes from chromosome 2 and 3), you can supply more than 1 regions file, one for each group by selecting "Insert Select regions".
267
268 .. image:: $PATH_TO_IMAGES/computeMatrix_selectRegions.png
269 :width: 600
270 :height: 150
271
272 You can select as many score (bigWig) files as you like. Simply use the Shift and/or Command key while clicking on the files of interest.
273
274 .. image:: $PATH_TO_IMAGES/computeMatrix_selectScores.png
275 :width: 600
276 :height: 136
277
278 The multitude of parameters can seem daunting at first - here are the options that we tend to tune most often:
279
280 * ``bin Size`` -- The default value works well most of the time, but if you want to have a more finely grained image, decrease the default value (but not smaller than your bigWig file(s)' bin size). If you want to reduce the computation time, increase it.
281 * ``Skip zeros`` -- useful to avoid completely blank lines in the heatmap.
282 * ``Convert missing values to 0?`` -- If you want to identify clusters of similar regions in an unsupervised fashion using ``plotHeatmap`` and/or ``plotProfile``, you should definitely set this to 'yes'.
283
284
285 Output files
286 ---------------
287
288 The default output is a **gzipped table of values** that is used by both ``plotHeatmap`` and ``plotProfile``.
289
290 The optional output files include a) the **regions after sorting and filtering (if selected)** as they were used to calculate the values for the plotting, and b) the uncompressed table that **underlies the heatmap**.
291
292 **TIP:** ``computeMatrix`` can also be used to filter and sort regions according to their score by making use of the "advanced output settings".
293
294 .. image:: $PATH_TO_IMAGES/computeMatrix_advancedOutput.png
295 :width: 600
296 :height: 189
297
298 .. image:: $PATH_TO_IMAGES/computeMatrix_output.png
299 :width: 600
300 :height: 297
301
302 Note that these advanced output options are available for ``plotHeatmap`` and ``plotProfile``, too.
303
304 See the following table for the optional output options:
305
306 +-----------------------------------+--------------------+-----------------+-----------------+
307 | **optional output type** | **computeMatrix** | **plotHeatmap** | **plotProfile** |
308 +-----------------------------------+--------------------+-----------------+-----------------+
309 | values underlying the heatmap | yes | yes | no |
310 +-----------------------------------+--------------------+-----------------+-----------------+
311 | values underlying the profile | no | no | yes |
312 +-----------------------------------+--------------------+-----------------+-----------------+
313 | sorted and/or filtered regions | yes | yes | yes |
314 +-----------------------------------+--------------------+-----------------+-----------------+
315
316 **More examples** can be found in our `Gallery <http://deeptools.readthedocs.org/en/latest/content/example_gallery.html#normalized-chip-seq-signals-and-peak-regions>`_.
257 317
258 ----- 318 -----
259 319
260 @REFERENCES@ 320 @REFERENCES@
261 ]]> 321 ]]>