changeset 28:f7712a057440 draft

new bugfix release
author bgruening
date Wed, 02 Apr 2014 09:15:44 -0400
parents bf1b1dcdd67b
children 3a2aab18a217
files bamCompare.xml bamCorrelate.xml bamCoverage.xml bamFingerprint.xml bigwigCompare.xml computeGCBias.xml computeMatrix.xml correctGCBias.xml deepTools_macros.xml heatmapper.xml profiler.xml readme.rst tool_dependencies.xml
diffstat 13 files changed, 76 insertions(+), 136 deletions(-) [+]
line wrap: on
line diff
--- a/bamCompare.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/bamCompare.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="deeptools_bamCompare" name="bamCompare" version="1.0.5">
+<tool id="deeptools_bamCompare" name="bamCompare" version="@WRAPPER_VERSION@.0">
     <description>normalizes and compares two BAM files to obtain the ratio, log2ratio or difference. (bam2bigwig)</description>
     <expand macro="requirements" />
     <expand macro="stdio" />
@@ -191,15 +191,20 @@
 **What it does**
 
 This tool compares two BAM files based on the number of mapped reads. To
-compare the BAM files, the genome is partitioned into bins of equal size,
-the reads are counted for each bin and each BAM file and finally, a summarizing value is reported.
-This value can be the ratio of the number of reads per bin, the log2 of the ratio or the difference.
-This tool can normalize the number of reads on each BAM file using the SES method
-proposed by Diaz et al. (2012). Stat Appl Genet Mol Biol 11(3). Normalization based on read counts is also available. The
-output is either a bedGraph or a bigWig file containing the bin location and
-the resulting comparison values.
-If paired-end reads are present, the fragment
-length reported in the BAM file is used by default.
+compare the BAM files, the genome is partitioned into bins of equal size, then
+the number of reads found in each BAM file is counted for such bins and
+finally a summarizing value is reported. This value can be the ratio of the
+number of reads per bin, the log2 of the ratio or the difference. This tool
+can normalize the number of reads on each BAM file using the SES method
+proposed by Diaz et al. (2012). "Normalization, bias correction, and peak
+calling for ChIP-seq". Statistical applications in genetics and molecular
+biology, 11(3). Normalization based on read counts is also available. The
+output is either a bedgraph or a bigwig file containing the bin location and
+the resulting comparison values. By default, if reads are mated, the fragment
+length reported in the BAM file is used. In the case of paired-end mapping
+each read mate is treated independently to avoid a bias when a mixture of
+concordant and discordant pairs is present. This means that *each end* will be
+extended to match the fragment length.
 
 
 .. image:: $PATH_TO_IMAGES/norm_IGVsnapshot_indFiles.png
--- a/bamCorrelate.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/bamCorrelate.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="deeptools_bamCorrelate" name="bamCorrelate" version="1.0.5">
+<tool id="deeptools_bamCorrelate" name="bamCorrelate" version="@WRAPPER_VERSION@.0">
     <description>correlates pairs of BAM files</description>
     <expand macro="requirements" />
     <expand macro="stdio" />
@@ -35,7 +35,9 @@
 
         #if $mode.modeOpt == "bins":
             --binSize '$mode.binSize'
-            --numberOfSamples '$mode.numberOfSamples'
+            --distanceBetweenBins '$mode.distanceBetweenBins'
+            $mode.doNotRemoveOutliers
+
         #else:
             --BED $mode.region_file
         #end if
@@ -74,8 +76,8 @@
             help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/>
 
         <param name="corMethod" type="select" label="Correlation method">
+            <option value="spearman" selected="True">Spearman</option>
             <option value="pearson">Pearson</option>
-            <option value="spearman">Spearman</option>
         </param>
 
         <conditional name="mode">
@@ -89,9 +91,28 @@
                     label="Bin size in bp"
                     help="Length in base pairs for a window used to sample the genome."/>
 
-                <param name="numberOfSamples" type="integer" value="100000" min="1" 
-                    label="Number of samples"
-                    help="Number of samples taken from the genome to compute the scaling factors"/>
+                <param name="distanceBetweenBins" type="integer" value="0" min="0"
+                    label="Distance between bins"
+                    help="By default, bamCorrelate considers consecutive bins of
+                        the specified 'Bin size'. However, to reduce the
+                        computation time, a larger distance between bins can
+                        by given. Larger distances result in less bins being
+                        considered"/>
+
+                <param name="doNotRemoveOutliers" type="boolean"
+                    truevalue="--doNotRemoveOutliers" falsevalue="" label="Do not filter outliers"
+                    help="By default, bins with very large counts are removed.
+                        By setting this option, outliers will not be
+                        removed. Bins with unusually large counts normally
+                        correspond to regions in the genome that accumulate
+                        lot of reads like satellite regions. If outliers are not
+                        removed the pearson correlation will wrongly report a
+                        very high correlation; that's why, by default,
+                        bamCorrelate tries to remove outliers using
+                        the median absolute deviation (MAD) method applying a
+                        threshold of 200 to only consider extremely large
+                        deviations from the median."/>
+
                 <expand macro="bamCorrelate_mode_actions" />
             </when>
             <when value="BED-file">
--- a/bamCoverage.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/bamCoverage.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="deeptools_bamCoverage" name="bamCoverage" version="1.0.5">
+<tool id="deeptools_bamCoverage" name="bamCoverage" version="@WRAPPER_VERSION@.0">
     <description> generates a coverage bigWig file from a given BAM file.  Multiple options are available to count reads and normalize coverage. (bam2bigwig)</description>
     <expand macro="requirements" />
     <expand macro="stdio" />
@@ -133,11 +133,15 @@
 
 **What it does**
 
-Given a BAM file, this tool generates a bigWig or bedGraph file with genome-wide coverage of fragment or read coverages. 
-The way the method works is by first calculating all the number of reads (either extended to match the fragment length or not) 
-that overlap each bin (a region of fixed length, i.e. 25 bp) in the genome. Bins with zero counts are skipped, i.e. not added to the output file. 
-The resulting read counts can be normalized using either a given scaling factor, the RPKM formula or to get a 1x depth of coverage (RPGC).
-
+Given a BAM file, this tool generates a bigWig or bedGraph file of fragment or
+read coverages. The way the method works is by first calculating all the
+number of reads (either extended to match the fragment length or not) that
+overlap each bin in the genome. The resulting read counts can be normalized
+using either a given scaling factor, the RPKM formula or to get a 1x depth of
+coverage (RPGC). In the case of paired-end mapping each read mate is treated
+independently to avoid a bias when a mixture of concordant and discordant
+pairs is present. This means that *each end* will be extended to match the
+fragment length.
 
 .. image:: $PATH_TO_IMAGES/norm_IGVsnapshot_indFiles.png
 
--- a/bamFingerprint.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/bamFingerprint.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="deeptools_bamFingerprint" name="bamFingerprint" version="1.0.5">
+<tool id="deeptools_bamFingerprint" name="bamFingerprint" version="@WRAPPER_VERSION@.0">
     <description>plots profiles of BAM files; useful for assesing ChIP signal strength</description>
     <expand macro="requirements" />
     <expand macro="stdio" />
--- a/bigwigCompare.xml	Mon Mar 17 16:23:58 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-<tool id="deeptools_bigwigCompare" name="bigwigCompare" version="1.0.5">
-    <description>normalizes and compares two bigWig files to obtain the ratio, log2ratio or difference</description>
-    <expand macro="requirements"/>
-    <expand macro="stdio" />
-    <macros>
-        <token name="@BINARY@">bigwigCompare</token>
-        <import>deepTools_macros.xml</import>
-    </macros>
-    <command>
-        bigwigCompare
-
-        @THREADS@
-
-        --bigwig1 '$bigwigFile1'
-        --bigwig2 '$bigwigFile2'
-
-        --outFileName '$outFileName'
-        --outFileFormat '$outFileFormat'
-
-        --ratio $comparison_type
-
-        #if str($region).strip() != '':
-            --region '$region'
-        #end if
-
-        #if $advancedOpt.showAdvancedOpt == "yes":
-
-          --missingDataAsZero $advancedOpt.missingDataAsZero
-          --scaleFactors '$advancedOpt.scaleFactor1:$advancedOpt.scaleFactor2'
-          --pseudocount '$advancedOpt.pseudocount'
-          --binSize $advancedOpt.binSize
-
-        #end if
-    </command>
-    <inputs>
-        <param name="bigwigFile1" format="bigwig" type="data" label="Treatment bigwig file" />
-        <param name="bigwigFile2" format="bigwig" type="data" label="bigWig file" />
-
-        <param name="comparison_type" type="select" 
-                label="How to compare the two files"
-                help="The reciprocal ratio returns the negative of the inverse of the ratio if the ratio is less than 0. The resulting values are interpreted as negative fold changes." >
-            <option value="log2" selected="true">log2 ratio</option>
-            <option value="ratio">simple ratio</option>
-            <option value="subtract">difference (subtract input from treatment)</option>
-            <option value="add">sum</option>
-            <option value="reciprocal_ratio">reciprocal ratio</option>
-        </param>
-
-        <param name="outFileFormat" type="select" label="Coverage file format">
-            <option value="bigwig" selected="true">bigwig</option>
-            <option value="bedgraph">bedgraph</option>
-        </param>
-
-        <expand macro="region_limit_operation" />
-
-        <conditional name="advancedOpt">
-            <param name="showAdvancedOpt" type="select" label="Show advanced options" >
-                <option value="no" selected="true">no</option>
-                <option value="yes">yes</option>
-            </param>
-            <when value="no" />
-            <when value="yes">
-                <param name="binSize" type="integer" value="50" min="1" 
-                    label="Bin size in bp"
-                    help="Size of the bins in bp for the output of the bigwig/bedgraph file "/>
-
-                <param name="missingDataAsZero" type="boolean" truevalue="yes" falsevalue="no" checked="True"
-                    label ="Treat missing data as zero"
-                    help  ="This parameter determines if missing data should be replaced with a zero. If set to &quot;no&quot;, missing data will be ignored and will not be included in the output file at all. Missing data is defined as those regions for which no value exists in *any* of the bigwig files. The decision to include or exclude missing data depends on the interpretation of the data. Missing data in a bigwig file may mean that there is no information available for certain regions, for example a repetitive region that is not being considered. In the same file regions with low coverage may get zero read counts. If missing data is replaced by zero, this would convert the excluded repetitive regions into regions of low coverage." />
-
-                <param name="scaleFactor1" type="float" value="1" label="Scale factor for treatment"/>
-                <param name="scaleFactor2" type="float" value="1" label="Scale factor for input"/>
-                <param name="pseudocount" type="float" value="1" label="Pseudocount" help="Small number to avoid dividing by zero."/>
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data format="bigwig" name="outFileName">
-        <change_format>
-            <when input="outFileFormat" value="bigwig" format="bigwig" />
-            <when input="outFileFormat" value="bedgraph" format="bedgraph" />
-        </change_format>
-        </data>
-    </outputs>
-
-  <help>
-
-**What it does**
-
-This tool compares two bigwig files based on the number of mapped reads. To
-compare the bigwig files the genome is partitioned into bins of equal size,
-then the number of reads found in each BAM file are counted for such bins and
-finally a summarizing value is reported. This value can be the ratio of the
-number of reads per bin, the log2 of the ratio, the sum or the difference.
-
-
------
-
-@REFERENCES@
-
-    </help>
-</tool>
--- a/computeGCBias.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/computeGCBias.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="deeptools_computeGCBias" name="computeGCBias" version="1.0.5">
+<tool id="deeptools_computeGCBias" name="computeGCBias" version="@WRAPPER_VERSION@.0">
     <description>to see whether your samples should be normalized for GC bias</description>
     <expand macro="requirements" />
     <expand macro="stdio" />
--- a/computeMatrix.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/computeMatrix.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="deeptools_computeMatrix" name="computeMatrix" version="1.0.5">
+<tool id="deeptools_computeMatrix" name="computeMatrix" version="@WRAPPER_VERSION@.0">
     <description>summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile</description>
     <expand macro="requirements" />
     <expand macro="stdio" />
--- a/correctGCBias.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/correctGCBias.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="deeptools_correctGCBias" name="correctGCBias" version="1.0.5">
+<tool id="deeptools_correctGCBias" name="correctGCBias" version="@WRAPPER_VERSION@.0">
     <description>uses the output from computeGCBias to generate corrected BAM files</description>
     <expand macro="requirements" />
     <expand macro="stdio" />
--- a/deepTools_macros.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/deepTools_macros.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -44,13 +44,14 @@
     </xml>
     
     <token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token>
+    <token name="@WRAPPER_VERSION@">1.1</token>
     <xml name="requirements">
         <requirements>
             <requirement type="binary">@BINARY@</requirement>
             <requirement type="package" >samtools</requirement>
             <requirement type="package" >deepTools</requirement>
             <requirement type="package" >ucsc_tools</requirement>
-            <requirement type="package" version="1.5.4_43d3b51b3c3ab71dc07f31fefe6b3492226cc217">deepTools</requirement>
+            <requirement type="package" version="1.5.8_09023018b96c23f922aa2ea553090e9df8ecf41e">deepTools</requirement>
             <requirement type="package" version="0.1">ucsc_tools</requirement>
             <requirement type="package" version="1.7.1">numpy</requirement>
             <requirement type="package" version="0.7.7">pysam</requirement>
@@ -64,12 +65,11 @@
     </xml>
 
     <xml name="kmeans_clustering">
-
         <conditional name="used_multiple_regions">
             <param name="used_multiple_regions_options" type="select" 
-                label="Did you use multiple regions in computeMatrix?"
+                label="Did you compute the matrix with more than one groups of regions?"
                 help="Would you like to cluster the regions according to the similarity of the signal distribution? This is only possible if you used computeMatrix on only one group of regions.">
-                <option value="yes">Yes, I used multiple regions.</option>
+                <option value="yes">Yes, I used multiple groups of regions</option>
                 <option value="no">No, I used only one region.</option>
             </param>
             <when value="no">
--- a/heatmapper.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/heatmapper.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="deeptools_heatmapper" name="heatmapper" version="1.0.5">
+<tool id="deeptools_heatmapper" name="heatmapper" version="@WRAPPER_VERSION@.0">
     <description>creates a heatmap for a score associated to genomic regions</description>
     <expand macro="requirements"/>
     <expand macro="stdio" />
--- a/profiler.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/profiler.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="deeptools_profiler" name="profiler" version="1.0.5">
+<tool id="deeptools_profiler" name="profiler" version="@WRAPPER_VERSION@.0">
     <description>
         creates a profile plot for a score associated to genomic regions
     </description>
--- a/readme.rst	Mon Mar 17 16:23:58 2014 -0400
+++ b/readme.rst	Wed Apr 02 09:15:44 2014 -0400
@@ -25,6 +25,18 @@
 For support, questions, or feature requests contact: deeptools@googlegroups.com
 
 
+============
+Installation
+============
+
+Requirements: python-2.7
+
+Galaxy should be able to automatically install all other dependencies, such as numpy or scipy.
+
+For the best performance we recommend to install blas/lapack/atlas in your environment before
+installing deepTools from the Tool Shed.
+
+
 ========
 Citation
 ========
--- a/tool_dependencies.xml	Mon Mar 17 16:23:58 2014 -0400
+++ b/tool_dependencies.xml	Wed Apr 02 09:15:44 2014 -0400
@@ -57,7 +57,7 @@
          <readme>The tools downloaded by this dependency definition are free for academic use. TODO: UCSC tools are only available with their latest version. That is not good for reproducibility.</readme>
      </package>
 
-    <package name="deepTools" version="1.5.4_43d3b51b3c3ab71dc07f31fefe6b3492226cc217">
+    <package name="deepTools" version="1.5.8_09023018b96c23f922aa2ea553090e9df8ecf41e">
         <install version="1.0">
             <actions>
                 <action type="shell_command">git clone --recursive https://github.com/fidelram/deepTools.git</action>
@@ -79,7 +79,7 @@
                         <package name="scipy" version="0.12.0" />
                     </repository>
                 </action>
-                <action type="shell_command">git reset --hard 43d3b51b3c3ab71dc07f31fefe6b3492226cc217</action>
+                <action type="shell_command">git reset --hard 09023018b96c23f922aa2ea553090e9df8ecf41e</action>
                 <action type="make_directory">$INSTALL_DIR/lib/python</action>
                 <action type="shell_command">
                     export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python &amp;&amp;