comparison computeGCBias.xml @ 0:4409903dcb88 draft

planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit 0a9265a12a303b54cdaa974e82e87c2ac60962ee-dirty
author bgruening
date Mon, 25 Jan 2016 20:20:49 -0500
parents
children e74853730716
comparison
equal deleted inserted replaced
-1:000000000000 0:4409903dcb88
1 <tool id="deeptools_compute_gc_bias" name="computeGCBias" version="@WRAPPER_VERSION@.0">
2 <description>to see whether your samples should be normalized for GC bias</description>
3 <macros>
4 <token name="@BINARY@">computeGCBias</token>
5 <import>deepTools_macros.xml</import>
6 </macros>
7 <expand macro="requirements" />
8 <command>
9 <![CDATA[
10 ln -s "$bamInput" "local_bamInput.bam" &&
11 ln -s "$bamInput.metadata.bam_index" local_bamInput.bam.bai &&
12
13 @BINARY@
14 @THREADS@
15 --bamfile local_bamInput.bam
16 --GCbiasFrequenciesFile $outFileName
17 --fragmentLength $fragmentLength
18
19 @reference_genome_source@
20
21 #if $effectiveGenomeSize.effectiveGenomeSize_opt == "specific":
22 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize
23 #else:
24 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize_opt
25 #end if
26
27 #if str($region).strip() != '':
28 --region '$region'
29 #end if
30
31 #if $advancedOpt.showAdvancedOpt == "yes":
32 --sampleSize '$advancedOpt.sampleSize'
33 --regionSize '$advancedOpt.regionSize'
34
35 #if $advancedOpt.filterOut:
36 --filterOut $advancedOpt.filterOut
37 #end if
38
39 #if $advancedOpt.extraSampling:
40 --extraSampling $advancedOpt.extraSampling
41 #end if
42 #end if
43
44 #if str($image_format) != 'none':
45 --biasPlot $outImageName
46 --plotFileFormat $image_format
47 #end if
48 ]]>
49 </command>
50 <inputs>
51 <param name="bamInput" format="bam" type="data" label="BAM file"
52 help="The BAM file must be sorted."/>
53
54 <expand macro="reference_genome_source" />
55 <expand macro="effectiveGenomeSize" />
56 <expand macro="fragmentLength" />
57 <expand macro="region_limit_operation" />
58
59 <conditional name="advancedOpt">
60 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
61 <option value="no" selected="true">no</option>
62 <option value="yes">yes</option>
63 </param>
64 <when value="no" />
65 <when value="yes">
66 <param name="sampleSize" type="integer" value="50000000" min="1"
67 label="Number of sampling points to consider" help="(--sampleSize)" />
68 <param name="regionSize" type="integer" value="300" min="1"
69 label="Region size"
70 help ="To plot the reads per GC over a region, the size of the region is
71 required (see below for more details about the method). By default, the bin size
72 is set to 300 bases, which is close to the standard fragment size of many sequencing
73 applications. However, if the depth of sequencing is low, a larger bin size will
74 be required, otherwise many bins will not overlap with any read. (--regionSize)"/>
75 <param name="filterOut" type="data" format="bed" optional="true"
76 label="BED file containing genomic regions to be excluded from the estimation of the correction"
77 help="Such regions usually contain repetitive regions and peaks that, if included, would
78 bias the correction. It is recommended to filter out known repetitive regions if multi-reads
79 (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data,
80 it is recommended to first use a peak caller to identify and filter out the identified peaks. (--filterOut)" />
81 <param name="extraSampling" type="data" format="bed" optional="true"
82 label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome"
83 help="(--extraSampling)" />
84 </when>
85 </conditional>
86 <param name="image_format" type="select"
87 label="GC bias plot"
88 help="If given, a diagnostic image summarizing the GC bias found on the sample will be created. (--plotFileFormat)">
89 <option value="none">No image</option>
90 <option value="png" selected="true">Image in png format</option>
91 <option value="pdf">Image in pdf format</option>
92 <option value="svg">Image in svg format</option>
93 <option value="eps">Image in eps format</option>
94 </param>
95 </inputs>
96 <outputs>
97 <data name="outFileName" format="tabular" />
98 <data name="outImageName" format="png" label="${tool.name} GC-bias Plot">
99 <filter>
100 ((
101 image_format != 'none'
102 ))
103 </filter>
104 <change_format>
105 <when input="image_format" value="pdf" format="pdf" />
106 <when input="image_format" value="svg" format="svg" />
107 <when input="image_format" value="eps" format="eps" />
108 </change_format>
109 </data>
110 </outputs>
111 <tests>
112 <test>
113 <param name="bamInput" value="paired_chr2L.bam" ftype="bam" />
114 <param name="image_format" value="png" />
115 <param name="showAdvancedOpt" value="yes" />
116 <param name="regionSize" value="1" />
117 <param name="ref_source" value="history" />
118 <param name="input1" value="sequence.2bit" />
119 <param name="sampleSize" value="10" />
120 <param name="effectiveGenomeSize_opt" value="specific" />
121 <param name="effectiveGenomeSize" value="23011544" />
122 <param name="region" value="chr2L" />
123 <param name="image_format" value="none" />
124 <output name="outFileName" file="computeGCBias_result1.tabular" ftype="tabular" />
125 </test>
126 </tests>
127 <help>
128 <![CDATA[
129 **What it does**
130
131 This tool computes the GC bias using the method proposed in Benjamini and Speed (2012) Nucleic Acids Res. (see below for further details).
132 The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias.
133 There are two plots produced by the tool: a boxplot showing the absolute read numbers per GC-content bin and an x-y plot
134 depicting the ratio of observed/expected reads per GC-content bin.
135
136 -----
137
138 **Summary of the method used**
139
140 In order to estimate how many reads with what kind of GC content one should have sequenced, we first need to determine how many regions the
141 reference genome contains with each percentage of GC content, i.e. how many regions in the genome have 50% GC (or 10% GC or 90% GC or...).
142 We then sample a large number of equally sized genomic bins and count how many times we see a bin with 50% GC (or 10% GC or 90% or...). These EXPECTED values are independent of any
143 sequencing bias and is purely dependent on the underlying genome (i.e. it will most likely vary between mouse and fruit fly due to their genome's different GC contents).
144 The OBSERVED values are based on the reads from the sequenced sample. Instead of noting how many genomic regions there are per GC content, we now count the reads per GC content.
145 In an ideal sample without GC bias, the ratio of OBSERVED/EXPECTED values should be close to 1 regardless of the GC content. Due to PCR (over)amplifications, the majority of ChIP samples
146 usually shows a significant bias towards reads with high GC content (>50%)
147
148 .. image:: $PATH_TO_IMAGES/QC_GCplots_input.png
149
150
151 You can find more details on the computeGCBias doc page: https://deeptools.readthedocs.org/en/master/content/tools/computeGCBias.html
152
153
154 **Output files**:
155
156 - Diagnostic plot
157
158 - box plot of absolute read numbers per GC-content bin
159 - x-y plot of observed/expected read ratios per GC-content bin
160
161 - Data matrix
162
163 - to be used for GC correction with correctGCbias
164
165
166 -----
167
168 @REFERENCES@
169 ]]>
170 </help>
171 <expand macro="citations" />
172 </tool>