changeset 11:4c7ab9995f9e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit cc4c1c4131518b9cbf986a1f252767ff73ca938e
author iuc
date Sat, 07 Apr 2018 15:45:41 -0400
parents d7725c5596ab
children fa56d93f7980
files diffbind.R diffbind.xml test-data/DiffBind_analysis.RData test-data/out_analysis_info.txt test-data/out_binding.matrix test-data/out_diffbind.bed test-data/out_plots.pdf test-data/out_rscript.txt
diffstat 8 files changed, 475 insertions(+), 186 deletions(-) [+]
line wrap: on
line diff
--- a/diffbind.R	Tue Mar 20 04:51:25 2018 -0400
+++ b/diffbind.R	Sat Apr 07 15:45:41 2018 -0400
@@ -4,8 +4,9 @@
 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
 suppressPackageStartupMessages({
-	library('getopt')
-	library('DiffBind')
+    library('getopt')
+    library('DiffBind')
+    library('rjson')
 })
 
 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
@@ -14,15 +15,19 @@
 #get options, using the spec as defined by the enclosed list.
 #we read the options from the default: commandArgs(TRUE).
 spec = matrix(c(
-    'verbose', 'v', 2, "integer",
-    'help' , 'h', 0, "logical",
+    'infile' , 'i', 1, "character",
     'outfile' , 'o', 1, "character",
+    'scorecol', 'n', 1, "integer",
+    'lowerbetter', 'l', 1, "logical",
+    'summits', 's', 1, "integer",
+    'th', 't', 1, "double",
+    'format', 'f', 1, "character",
     'plots' , 'p', 2, "character",
-    'infile' , 'i', 1, "character",
-    'format', 'f', 1, "character",
-    'th', 't', 1, "double",
     'bmatrix', 'b', 0, "logical",
-    "rdaOpt", "r", 0, "logical"
+    "rdaOpt", "r", 0, "logical",
+    'infoOpt' , 'a', 0, "logical",
+    'verbose', 'v', 2, "integer",
+    'help' , 'h', 0, "logical"
 ), byrow=TRUE, ncol=4);
 
 opt = getopt(spec);
@@ -34,31 +39,81 @@
     q(status=1);
 }
 
-if ( !is.null(opt$plots) ) {
-    pdf(opt$plots)
+parser <- newJSONParser()
+parser$addData(opt$infile)
+factorList <- parser$getObject()
+filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+peaks <- filenamesIn[grepl("peaks.bed", filenamesIn)]
+bams <- filenamesIn[grepl("bamreads.bam", filenamesIn)]
+ctrls <- filenamesIn[grepl("bamcontrol.bam", filenamesIn)]
+
+# get the group and sample id from the peaks filenames
+groups <- sapply(strsplit(peaks,"-"), `[`, 1)
+samples <- sapply(strsplit(peaks,"-"), `[`, 2)
+
+if ( length(ctrls) != 0 ) {
+    sampleTable <- data.frame(SampleID=samples,
+                        Condition=groups,
+                        bamReads=bams,
+                        bamControl=ctrls,
+                        Peaks=peaks,
+                        Tissue=samples, # using "Tissue" column to display ids as labels in PCA plot
+                        stringsAsFactors=FALSE)
+} else {
+    sampleTable <- data.frame(SampleID=samples,
+                        Replicate=samples,
+                        Condition=groups,
+                        bamReads=bams,
+                        Peaks=peaks,
+                        Tissue=samples,
+                        stringsAsFactors=FALSE)
 }
 
-sample = dba(sampleSheet=opt$infile, peakFormat='bed')
-sample_count = dba.count(sample)
+sample = dba(sampleSheet=sampleTable, peakFormat='bed', scoreCol=opt$scorecol, bLowerScoreBetter=opt$lowerbetter)
+
+if ( !is.null(opt$summits) ) {
+    sample_count = dba.count(sample, summits=opt$summits)
+} else {
+    sample_count = dba.count(sample)
+}
+
 sample_contrast = dba.contrast(sample_count, categories=DBA_CONDITION, minMembers=2)
 sample_analyze = dba.analyze(sample_contrast)
-diff_bind = dba.report(sample_analyze)
-orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE)
-dev.off()
+diff_bind = dba.report(sample_analyze, th=opt$th)
 
+# Generate plots
+if ( !is.null(opt$plots) ) {
+    pdf(opt$plots)
+    orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE, cexCol=0.8, th=opt$th)
+    dba.plotPCA(sample_analyze, contrast=1, th=opt$th, label=DBA_TISSUE, labelSize=0.3)
+    dba.plotMA(sample_analyze, th=opt$th)
+    dba.plotVolcano(sample_analyze, th=opt$th)
+    dba.plotBox(sample_analyze, th=opt$th)
+    dev.off()
+}
+
+# Output differential binding sites
 resSorted <- diff_bind[order(diff_bind$FDR),]
-write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE, col.names = FALSE)
+write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE)
 
 # Output binding affinity scores
 if (!is.null(opt$bmatrix)) {
     bmat <- dba.peakset(sample_count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME)
-    write.table(as.data.frame(bmat), file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
+    write.table(as.data.frame(bmat), file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE)
 }
 
-## Output RData file
-
+# Output RData file
 if (!is.null(opt$rdaOpt)) {
     save.image(file = "DiffBind_analysis.RData")
 }
 
-sessionInfo()
+# Output analysis info
+if (!is.null(opt$infoOpt)) {
+    info <- "DiffBind_analysis_info.txt"
+    cat("dba.count Info\n\n", file=info, append = TRUE)
+    capture.output(sample, file=info, append=TRUE)
+    cat("\ndba.analyze Info\n\n", file=info, append = TRUE)
+    capture.output(sample_analyze, file=info, append=TRUE)
+    cat("\nSessionInfo\n\n", file=info, append = TRUE)
+    capture.output(sessionInfo(), file=info, append=TRUE)
+}
\ No newline at end of file
--- a/diffbind.xml	Tue Mar 20 04:51:25 2018 -0400
+++ b/diffbind.xml	Sat Apr 07 15:45:41 2018 -0400
@@ -1,8 +1,9 @@
-<tool id="diffbind" name="DiffBind" version="2.6.6.0">
+<tool id="diffbind" name="DiffBind" version="2.6.6.1">
     <description> differential binding analysis of ChIP-Seq peak data</description>
     <requirements>
         <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement>
         <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="0.2.15">r-rjson</requirement>
     </requirements>
     <stdio>
         <regex match="Execution halted"
@@ -19,69 +20,112 @@
            description="An undefined error occured, please check your intput carefully and contact your administrator." />
     </stdio>
     <version_command><![CDATA[
-echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
     ]]></version_command>
     <command><![CDATA[
-        ## seems that diffbind also needs file extensions to work properly
-        #set $counter = 1
-        #for $sample in $samples:
-            ln -s $sample.bamreads #echo str($counter) + "_bamreads.bam"# &&
-            ln -s ${sample.bamreads.metadata.bam_index} #echo str($counter) + "_bamreads.bai"# &&
-            #if str( $sample.bamcontrol ) != 'None':
-                ln -s $sample.bamcontrol #echo str($counter) + "_bamcontrol.bam"# &&
-                ln -s ${sample.bamcontrol.metadata.bam_index} #echo str($counter) + "_bamcontrol.bai"# &&
-            #end if
-            #set $counter = $counter + 1
+#import re
+#import json
+
+## Adapted from DESeq2 wrapper
+#set $temp_factor_names = list()
+#set $temp_factor = list()
+
+#for $g in $rep_group:
+
+    #set $peak_files = list()
+    #set $bam_files = list()
+    #set $bam_controls = list()
+
+    #for $file in $g.peaks:
+        #set $file_name = re.sub('[^\w\-\s]', '_', str($file.element_identifier))
+        ln -s '${file}' ${g.groupName}-${file_name}-peaks.bed &&
+        $peak_files.append(str($g.groupName) + '-' + $file_name + '-peaks.bed')
+    #end for
+
+    #for $bam in $g.bamreads:
+        #set $bam_name = re.sub('[^\w\-\s]', '_', str($bam.element_identifier))
+        ln -s '${bam}' ${bam_name}-bamreads.bam &&
+        ln -s ${bam.metadata.bam_index} ${bam_name}-bamreads.bai &&
+        $bam_files.append($bam_name + '-bamreads.bam')
+    #end for
+
+    $temp_factor.append( {str($g.groupName): $peak_files} )
+    $temp_factor.append( {str($g.groupName): $bam_files} )
+
+    #if str( $g.bamcontrol ) != 'None':
+        #for $ctrl in $g.bamcontrol:
+            #set $ctrl_name = re.sub('[^\w\-\s]', '_', str($ctrl.element_identifier))
+            ln -s '${ctrl}' ${g.groupName}-${ctrl_name}-bamcontrol.bam &&
+            ln -s ${ctrl.metadata.bam_index} ${g.groupName}-${ctrl_name}-bamcontrol.bai &&
+            $bam_controls.append(str($g.groupName) + '-' + $ctrl_name + '-bamcontrol.bam')
         #end for
+        $temp_factor.append( {str($g.groupName): $bam_controls} )
+    #end if
 
-        Rscript '$__tool_directory__/diffbind.R'
-            -i $infile
-            -o '$outfile'
-            -t $th
-            -f $out.format
-            -p '$plots'
+#end for
+
+$temp_factor.reverse()
+$temp_factor_names.append([str($factorName), $temp_factor])
+
+
+Rscript '$__tool_directory__/diffbind.R'
+
+    -i '#echo json.dumps(temp_factor_names)#'
+    -o '$outfile'
+    -t $th
+    -f $out.format
+    -p '$plots'
 
-            #if $out.binding_matrix:
-                -b
-            #end if
+    #if $scorecol:
+        -n "$scorecol"
+    #end if
+    #if $lowerbetter:
+        -l "$lowerbetter"
+    #end if
+    #if $summits:
+        -s "$summits"
+    #end if
 
-            #if $out.rdata:
-                -r
-            #end if
+    #if $out.binding_matrix:
+        -b
+    #end if
+
+    #if $out.rdata:
+        -r
+    #end if
+
+    #if $out.analysis_info:
+        -a
+    #end if
+
+    #if $out.rscript:
+        && cp '$__tool_directory__/diffbind.R' '$rscript'
+    #end if
 ]]>
     </command>
-    <configfiles>
-<configfile name="infile"><![CDATA[
-#set $counter = 1
-#for $sample in $samples:
-#if str( $sample.bamcontrol ) != 'None' and $counter == 1:
-SampleID,Tissue,Factor,Condition,Replicate,bamReads,bamControl,Peaks
-#elif $counter == 1:
-SampleID,Tissue,Factor,Condition,Replicate,bamReads,Peaks
-#end if
-#if str( $sample.bamcontrol ) != 'None':
-$sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,#echo str($counter) + '_bamcontrol.bam'#,$sample.peaks
-#else:
-$sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,$sample.peaks
-#end if
-#set $counter = $counter + 1
-#end for]]></configfile>
-    </configfiles>
     <inputs>
-        <repeat name="samples" title="Samples" min="4">
-            <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" />
-            <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" />
-            <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" />
-            <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" />
-            <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" />
-            <param name="bamreads" type="data" format="bam" label="Read BAM file" help="Specify the Read BAM file, used for Peak calling."/>
-            <param name="bamcontrol" type="data" format="bam" optional="True" label="Control BAM file" help="If specifying a control BAM file for this sample, then all samples are required to specify one."/>
-            <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/>
+        <param name="factorName" type="text" label="Name" help="Name of experiment factor of interest (e.g. Condition). One factor must be entered and there must be two or more groups. NOTE: Please only use letters, numbers or underscores.">
+        <sanitizer>
+            <valid initial="string.letters,string.digits"><add value="_" /></valid>
+        </sanitizer>
+        </param>
+        <repeat name="rep_group" title="Group" min="2" default="2">
+            <param name="groupName" type="text" label="Name"
+            help="Name of group that the peak files belong to (e.g. Resistant or Responsive). NOTE: Please only use letters, numbers or underscores (case sensitive).">
+            <sanitizer>
+                <valid initial="string.letters,string.digits"><add value="_" /></valid>
+            </sanitizer>
+            </param>
+            <param name="peaks" type="data" format="bed" multiple="true" label="Peak files" help="Result of your Peak calling experiment"/>
+            <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM file" help="Specify the Read BAM file used for Peak calling."/>
+            <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM file" help="If specifying a control BAM file, all samples are required to specify one."/>
         </repeat>
-        <param name="th" type="float" value="1" min="0" max="1"
-                label="FDR Threshold"
-                help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/>
-        
+
+        <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores.  Default: 8 (narrowPeak)"/>
+        <param name="lowerbetter" type="boolean" truevalue="True" falsevalue="" checked="False" label="Lower score is better?" help="DiffBind by default assumes that a higher score indicates a better peak, for example narrowPeaks -log10pvalue. If this is not the case, for example if the score is a p-value or FDR, set this option to Yes. Default: No" />
+        <param name="summits" type="integer" min="0" optional="True" label="Summits" help="Extend peaks Nbp up- and downstream of the summit. For punctate peaks it is advisable to extend (e.g. 250bp), see the DiffBind User Guide"/>
+        <param name="th" type="float" value="0.05" min="0" max="1" label="FDR Threshold" help="Significance threshold; all sites with FDR less than or equal to this value will be included in the output. A value of 1 will output all binding sites. Default: 0.05"/>
+
         <!-- Output Options -->
         <section name="out" expanded="false" title="Output Options">
             <param name="format" type="select" label="Output Format">
@@ -91,8 +135,9 @@
             </param>
             <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" />
             <param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" />
-            <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No">
-            </param>
+            <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No"/>
+            <param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/>
+            <param name="analysis_info" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output analysis info?" help="If this option is set to Yes, information from the dba.count and dba.analyze commmands will be output in a text file. Default: No"/>
         </section>
     </inputs>
 
@@ -112,53 +157,43 @@
         <data name="rdata" format="rdata" from_work_dir="DiffBind_analysis.RData" label="${tool.name} on ${on_string}: RData file">
             <filter>out['rdata']</filter>
         </data>
+        <data name="rscript" format="txt" label="${tool.name} on ${on_string}: Rscript">
+            <filter>out['rscript']</filter>
+        </data>
+        <data name="analysis_info" format="txt" from_work_dir="DiffBind_analysis_info.txt" label="${tool.name} on ${on_string}: Analysis info">
+            <filter>out['analysis_info']</filter>
+        </data>
     </outputs>
 
     <tests>
-        <test expect_num_outputs="4">
-            <repeat name="samples">
-                <param name="sample_id" value="BT4741" />
-                <param name="tissue" value="BT474" />
-                <param name="factor" value="ER" />
-                <param name="condition" value="Resistant" />
-                <param name="replicate" value="1" />
-                <param name="bamreads" ftype="bam" value="BT474_ER_1.bam" />
-                <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz" />
-            </repeat>
-            <repeat name="samples">
-                <param name="sample_id" value="BT4742" />
-                <param name="tissue" value="BT474" />
-                <param name="factor" value="ER" />
-                <param name="condition" value="Resistant" />
-                <param name="replicate" value="2" />
-                <param name="bamreads" ftype="bam" value="BT474_ER_2.bam" />
-                <param name="peaks" ftype="bed" value="BT474_ER_2.bed.gz" />
+        <test expect_num_outputs="6">
+            <param name="factorName" value="Condition"/>
+            <repeat name="rep_group">
+                <param name="groupName" value="Resistant"/>
+                <param name="peaks" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
+                <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
             </repeat>
-            <repeat name="samples">
-                <param name="sample_id" value="MCF71" />
-                <param name="tissue" value="MCF7" />
-                <param name="factor" value="ER" />
-                <param name="condition" value="Responsive" />
-                <param name="replicate" value="1" />
-                <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam" />
-                <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz" />
+            <repeat name="rep_group">
+                <param name="groupName" value="Responsive"/>
+                <param name="peaks" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
+                <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
             </repeat>
-            <repeat name="samples">
-                <param name="sample_id" value="MCF72" />
-                <param name="tissue" value="MCF7" />
-                <param name="factor" value="ER" />
-                <param name="condition" value="Responsive"  />
-                <param name="replicate" value="2" />
-                <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" />
-                <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" />
-            </repeat>
+            <param name="scorecol" value="5" />
             <param name="pdf" value="True" />
             <param name="binding_matrix" value="True" />
             <param name="rdata" value="True" />
+            <param name="rscript" value="True"/>
+            <param name="analysis_info" value="True"/>
             <output name="outfile" value="out_diffbind.bed" />
             <output name="plots" value="out_plots.pdf" compare="sim_size" />
             <output name="binding_matrix" value="out_binding.matrix" />
             <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/>
+            <output name="rscript" value="out_rscript.txt"/>
+            <output name="analysis_info" value="out_analysis_info.txt" compare="sim_size" >
+                <assert_contents>
+                    <has_text text="SessionInfo"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
@@ -190,11 +225,7 @@
 as well as comparing the results of an occupancy-based analysis with an affinity-based one.
 Finally, certain technical aspects of the how these analyses are accomplished are detailed.
 
-Note DiffBind requires a minimum of four samples (two groups with two replicates each).
-
-.. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
-.. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
-.. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf
+Note this DiffBind tool requires a minimum of four samples (two groups with two replicates each).
 
 -----
 
@@ -210,36 +241,29 @@
 
 **Sample Information**
 
-You have to specify your sample information in the tool form above, where Condition contains the groups you want to compare.
+You have to specify your sample information in the tool form above, where Factor is the groups you want to compare (e.g Resistant and Responsive).
 
 Example:
 
-    ============= ========== ========== ============= =============
-     **SampleID** **Tissue** **Factor** **Condition** **Replicate**
-    ------------- ---------- ---------- ------------- -------------
-    BT4741        BT474      ER         Resistant     1            
-    BT4742        BT474      ER         Resistant     2            
-    MCF71         MCF7       ER         Responsive    1            
-    MCF72         MCF7       ER         Responsive    2            
-    MCF73         MCF7       ER         Responsive    3            
-    T47D1         T47D       ER         Responsive    1            
-    T47D2         T47D       ER         Responsive    2            
-    MCF7r1        MCF7       ER         Resistant     1            
-    MCF7r2        MCF7       ER         Resistant     2            
-    ZR751         ZR75       ER         Responsive    1            
-    ZR752         ZR75       ER         Responsive    2            
-    ============= ========== ========== ============= =============
+    ============= =============
+     **SampleID** **Group**
+    ------------- -------------
+    BT4741        Resistant
+    BT4742        Resistant
+    MCF71         Responsive
+    MCF72         Responsive
+    ============= =============
 
 
 **Peak files**
 
-Result of your Peak calling experiment in bed format, one file for each sample is required.
+Result of your Peak calling experiment in bed format, one file for each sample is required. The peak caller, format and score column can be specified in the tool form above. The default settings expect narrowPeak bed format, which has the score in the 8th column (-log10pvalue), and can be output from MACS2.
 
-Example:
+Example (MACS.xls file in bed format):
 
-    ======= ======= ======= =============== =======
-    1          2      3          4           **5**
-    ======= ======= ======= =============== =======
+    ======= ======= ======= =============== ==============
+    1          2      3          4           **5 (Score)**
+    ======= ======= ======= =============== ==============
     chr18   215562  216063  MACS_peak_16037 56.11
     chr18   311530  312105  MACS_peak_16038 222.49
     chr18   356656  357315  MACS_peak_16039 92.06
@@ -250,10 +274,10 @@
     chr18   503518  504552  MACS_peak_16044 818.30
     chr18   531672  532274  MACS_peak_16045 159.30
     chr18   568326  569282  MACS_peak_16046 601.11
-    ======= ======= ======= =============== =======
+    ======= ======= ======= =============== ==============
 
-* BAM file which contains the mapped sequencing reads can be associated with each peakset
-* Control BAM file represents a control dataset and are optional, but have to specified for all when used.
+* BAM file which contains the mapped sequencing reads associated with each peakset, one file for each sample is required.
+* Optional: Control BAM file representing a control dataset. If used, has to be specified for all samples. Note that the DiffBind authors say control reads are best utilized prior to running DiffBind, at the peak calling stage (e.g. with MACS2) and in blacklists, see this `Bioconductor post`_.
 
 -----
 
@@ -265,9 +289,11 @@
 
 Optionally, under **Output Options** you can choose to output
 
-    * a correlation heatmap plot
+    * a PDF of plots (Heatmap, PCA, MA, Volcano, Boxplots)
     * a binding affinity matrix
-    * an RData file
+    * the R script used by this tool
+    * an RData file of the R objects generated
+    * a text file with information on the analysis (number of Intervals, FriP scores, method used)
 
 **Differentially Bound Sites**
 
@@ -275,15 +301,15 @@
 
 Example - BED format:
 
-    =====  ======  ======  ===== ====  ====    ====    ====    =====   ========    ========
-    1      2       3       4     5     6       7       8       9       10          **11**
-    =====  ======  ======  ===== ====  ====    ====    ====    =====   ========    ========
-    chr18  394600  396513  1914    *   7.15    7.89    5.55    2.35    7.06e-24    9.84e-21
-    chr18  111567  112005  439     *   5.71    3.63    6.53    -2.89   1.27e-08    8.88e-06
-    chr18  346464  347342  879     *   5       3.24    5.77    -2.52   6.51e-06    0.00303
-    chr18  399014  400382  1369    *   7.62    8.05    7       1.04    1.04e-05    0.00364
-    chr18  371110  372102  993     *   4.63    5.36    3.07    2.3     8.1e-05     0.0226
-    =====  ======  ======  ===== ====  ====    ====    ====    =====   ========    ========
+    ========  ======  ======  =====  ======  =====  ===============  ==============  =======   ========  ========
+    seqnames  start   end     width  strand  Conc   Conc_Responsive  Conc_Resistant  Fold      p.value   **FDR**
+    ========  ======  ======  =====  ======  =====  ===============  ==============  =======   ========  ========
+    chr18     394600  396513  1914    *      7.15   5.55             7.89            -2.35     7.06e-24  9.84e-21
+    chr18     111567  112005  439     *      5.71   6.53             3.63            2.89      1.27e-08  8.88e-06
+    chr18     346464  347342  879     *      5      5.77             3.24            2.52      6.51e-06  0.00303
+    chr18     399014  400382  1369    *      7.62   7                8.05            -1.04     1.04e-05  0.00364
+    chr18     371110  372102  993     *      4.63   3.07             5.36            -2.3      8.1e-05   0.0226
+    ========  ======  ======  =====  ======  =====  ===============  ==============  =======   ========  ========
 
     Columns contain the following data:
 
@@ -307,21 +333,20 @@
 
 Example:
 
-    ====== ====== ====== ========== ========== ========= ====== ========= ====
-    ID     Tissue Factor Condition  Treatment  Replicate Caller Intervals FRiP
-    ====== ====== ====== ========== ========== ========= ====== ========= ====
-    BT4741 BT474  ER     Resistant  Full-Media 1         counts 2845      0.16
-    BT4742 BT474  ER     Resistant  Full-Media 2         counts 2845      0.15
-    MCF71  MCF7   ER     Responsive Full-Media 1         counts 2845      0.27
-    MCF72  MCF7   ER     Responsive Full-Media 2         counts 2845      0.17
-    MCF73  MCF7   ER     Responsive Full-Media 3         counts 2845      0.23
-    T47D1  T47D   ER     Responsive Full-Media 1         counts 2845      0.10
-    T47D2  T47D   ER     Responsive Full-Media 2         counts 2845      0.06
-    MCF7r1 MCF7   ER     Resistant  Full-Media 1         counts 2845      0.20
-    MCF7r2 MCF7   ER     Resistant  Full-Media 2         counts 2845      0.13
-    ZR751  ZR75   ER     Responsive Full-Media 1         counts 2845      0.32
-    ZR752  ZR75   ER     Responsive Full-Media 2         counts 2845      0.22
-    ====== ====== ====== ========== ========== ========= ====== ========= ====
+    =====  ======  ======  ================  ================  ================  ================
+    CHR    START   END     MCF7_ER_1.bed     MCF7_ER_2.bed     BT474_ER_1.bed    BT474_ER_2.bed
+    =====  ======  ======  ================  ================  ================  ================
+    chr18  111567  112005  137.615208000375  59.878372946728   29.4139375878664  19.9594576489093
+    chr18  189223  189652  19.9594576489093  12.6059732519427  11.5554754809475  23.110950961895
+    chr18  215232  216063  11.5554754809475  15.7574665649284  31.5149331298568  72.4843461986707
+    chr18  311530  312172  17.8584621069189  11.5554754809475  54.6258840917518  43.0704086108043
+    chr18  346464  347342  75.6358395116564  40.9694130688139  21.0099554199046  16.8079643359236
+    chr18  356560  357362  11.5554754809475  14.7069687939332  57.7773774047375  53.5753863207566
+    chr18  371110  372102  8.40398216796182  9.45447993895705  81.9388261376278  82.989323908623
+    chr18  394600  396513  56.7268796337423  43.0704086108043  510.541916703681  438.05757050501
+    chr18  399014  400382  156.524167878289  117.655750351465  558.864814169461  496.885445680743
+    chr18  498906  500200  767.913870597511  278.381909313735  196.443083176108  181.736114382174
+    =====  ======  ======  ================  ================  ================  ================
 
 -----
 
@@ -392,7 +417,7 @@
 of reads within differentially bound sites corresponding to whether they gain or
 lose affinity between the two sample groups. A reporting mechanism enables differentially
 bound sites to be extracted for further processing, such as annotation, motif, and
-pathway analyses. *Note that currently only the correlation plot is implemented in this Galaxy tool.*
+pathway analyses.
 
 -----
 
@@ -401,6 +426,11 @@
 DiffBind Authors:  Rory Stark, Gordon Brown (2011)
 Wrapper authors: Bjoern Gruening, Pavankumar Videm
 
+.. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
+.. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
+.. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf
+.. _`Bioconductor post`: https://support.bioconductor.org/p/69924/
+
 ]]>
     </help>
     <citations>
Binary file test-data/DiffBind_analysis.RData has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_analysis_info.txt	Sat Apr 07 15:45:41 2018 -0400
@@ -0,0 +1,83 @@
+dba.count Info
+
+4 Samples, 1394 sites in matrix (2141 total):
+              ID         Tissue  Condition Caller Intervals
+1  MCF7_ER_1_bed  MCF7_ER_1_bed Responsive   macs      1556
+2  MCF7_ER_2_bed  MCF7_ER_2_bed Responsive   macs      1046
+3 BT474_ER_1_bed BT474_ER_1_bed  Resistant   macs      1080
+4 BT474_ER_2_bed BT474_ER_2_bed  Resistant   macs      1122
+
+dba.analyze Info
+
+4 Samples, 1394 sites in matrix:
+              ID         Tissue  Condition Caller Intervals FRiP
+1  MCF7_ER_1_bed  MCF7_ER_1_bed Responsive counts      1394 0.38
+2  MCF7_ER_2_bed  MCF7_ER_2_bed Responsive counts      1394 0.22
+3 BT474_ER_1_bed BT474_ER_1_bed  Resistant counts      1394 0.27
+4 BT474_ER_2_bed BT474_ER_2_bed  Resistant counts      1394 0.25
+
+1 Contrast:
+      Group1 Members1    Group2 Members2 DB.DESeq2
+1 Responsive        2 Resistant        2         5
+
+SessionInfo
+
+R version 3.4.1 (2017-06-30)
+Platform: x86_64-apple-darwin14.5.0 (64-bit)
+Running under: OS X El Capitan 10.11.6
+
+Matrix products: default
+BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
+LAPACK: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libLAPACK.dylib
+
+locale:
+[1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_US.UTF-8
+
+attached base packages:
+[1] parallel  stats4    methods   stats     graphics  grDevices utils    
+[8] datasets  base     
+
+other attached packages:
+ [1] bindrcpp_0.2               rjson_0.2.15              
+ [3] DiffBind_2.6.6             SummarizedExperiment_1.8.0
+ [5] DelayedArray_0.4.1         matrixStats_0.52.2        
+ [7] Biobase_2.38.0             GenomicRanges_1.30.3      
+ [9] GenomeInfoDb_1.14.0        IRanges_2.12.0            
+[11] S4Vectors_0.16.0           BiocGenerics_0.24.0       
+[13] getopt_1.20.0             
+
+loaded via a namespace (and not attached):
+  [1] Category_2.44.0          bitops_1.0-6             bit64_0.9-5             
+  [4] RColorBrewer_1.1-2       progress_1.1.2           httr_1.3.1              
+  [7] Rgraphviz_2.22.0         tools_3.4.1              backports_1.0.5         
+ [10] R6_2.2.2                 rpart_4.1-13             KernSmooth_2.23-15      
+ [13] Hmisc_4.0-3              DBI_0.8                  lazyeval_0.2.1          
+ [16] colorspace_1.3-2         nnet_7.3-12              gridExtra_2.3           
+ [19] DESeq2_1.18.1            prettyunits_1.0.2        bit_1.1-12              
+ [22] compiler_3.4.1           sendmailR_1.2-1          graph_1.56.0            
+ [25] htmlTable_1.9            labeling_0.3             rtracklayer_1.38.0      
+ [28] caTools_1.17.1           scales_0.5.0             checkmate_1.8.2         
+ [31] BatchJobs_1.6            genefilter_1.60.0        RBGL_1.54.0             
+ [34] stringr_1.3.0            digest_0.6.12            Rsamtools_1.30.0        
+ [37] foreign_0.8-67           AnnotationForge_1.20.0   XVector_0.18.0          
+ [40] htmltools_0.3.6          base64enc_0.1-3          pkgconfig_2.0.1         
+ [43] limma_3.34.6             htmlwidgets_1.0          rlang_0.2.0             
+ [46] RSQLite_2.0              BBmisc_1.11              bindr_0.1.1             
+ [49] GOstats_2.44.0           hwriter_1.3.2            BiocParallel_1.12.0     
+ [52] gtools_3.5.0             acepack_1.4.1            dplyr_0.7.4             
+ [55] RCurl_1.95-4.8           magrittr_1.5             Formula_1.2-1           
+ [58] GO.db_3.5.0              GenomeInfoDbData_0.99.1  Matrix_1.2-12           
+ [61] Rcpp_0.12.15             munsell_0.4.3            stringi_1.1.6           
+ [64] edgeR_3.20.7             zlibbioc_1.24.0          gplots_3.0.1            
+ [67] fail_1.3                 plyr_1.8.4               grid_3.4.1              
+ [70] blob_1.1.1               ggrepel_0.7.0            gdata_2.18.0            
+ [73] lattice_0.20-34          Biostrings_2.46.0        splines_3.4.1           
+ [76] GenomicFeatures_1.28.5   annotate_1.56.0          locfit_1.5-9.1          
+ [79] knitr_1.20               pillar_1.2.1             systemPipeR_1.12.0      
+ [82] geneplotter_1.56.0       biomaRt_2.34.2           glue_1.2.0              
+ [85] XML_3.98-1.6             ShortRead_1.36.0         latticeExtra_0.6-28     
+ [88] data.table_1.10.4        gtable_0.2.0             amap_0.8-14             
+ [91] assertthat_0.2.0         ggplot2_2.2.1            xtable_1.8-2            
+ [94] survival_2.40-1          tibble_1.4.2             pheatmap_1.0.8          
+ [97] GenomicAlignments_1.14.1 AnnotationDbi_1.40.0     memoise_1.1.0           
+[100] cluster_2.0.6            brew_1.0-6               GSEABase_1.40.0         
--- a/test-data/out_binding.matrix	Tue Mar 20 04:51:25 2018 -0400
+++ b/test-data/out_binding.matrix	Sat Apr 07 15:45:41 2018 -0400
@@ -1,17 +1,18 @@
-chr18	111567	112005	29.4139375878664	19.9594576489093	137.615208000375	59.878372946728
-chr18	189223	189652	11.5554754809475	23.110950961895	19.9594576489093	12.6059732519427
-chr18	215232	216063	31.5149331298568	72.4843461986707	11.5554754809475	15.7574665649284
-chr18	311530	312172	54.6258840917518	43.0704086108043	17.8584621069189	11.5554754809475
-chr18	346464	347342	21.0099554199046	16.8079643359236	75.6358395116564	40.9694130688139
-chr18	356560	357362	57.7773774047375	53.5753863207566	11.5554754809475	14.7069687939332
-chr18	371110	372102	81.9388261376278	82.989323908623	8.40398216796182	9.45447993895705
-chr18	394600	396513	510.541916703681	438.05757050501	56.7268796337423	43.0704086108043
-chr18	399014	400382	558.864814169461	496.885445680743	156.524167878289	117.655750351465
-chr18	498906	500200	196.443083176108	181.736114382174	767.913870597511	278.381909313735
-chr18	503518	504552	194.342087634117	231.10950961895	99.7972882445466	61.9793684887184
-chr18	531672	532439	57.7773774047375	73.5348439696659	44.1209063817996	23.110950961895
-chr18	568036	569332	207.998558657055	221.655029679993	165.978647817246	157.574665649284
-chr18	589046	589875	117.655750351466	149.170683481322	50.4238930077709	72.4843461986707
+CHR	START	END	MCF7_ER_1_bed	MCF7_ER_2_bed	BT474_ER_1_bed	BT474_ER_2_bed
+chr18	111567	112005	137.615208000375	59.878372946728	29.4139375878664	19.9594576489093
+chr18	189223	189652	19.9594576489093	12.6059732519427	11.5554754809475	23.110950961895
+chr18	215232	216063	11.5554754809475	15.7574665649284	31.5149331298568	72.4843461986707
+chr18	311530	312172	17.8584621069189	11.5554754809475	54.6258840917518	43.0704086108043
+chr18	346464	347342	75.6358395116564	40.9694130688139	21.0099554199046	16.8079643359236
+chr18	356560	357362	11.5554754809475	14.7069687939332	57.7773774047375	53.5753863207566
+chr18	371110	372102	8.40398216796182	9.45447993895705	81.9388261376278	82.989323908623
+chr18	394600	396513	56.7268796337423	43.0704086108043	510.541916703681	438.05757050501
+chr18	399014	400382	156.524167878289	117.655750351465	558.864814169461	496.885445680743
+chr18	498906	500200	767.913870597511	278.381909313735	196.443083176108	181.736114382174
+chr18	503518	504552	99.7972882445466	61.9793684887184	194.342087634117	231.10950961895
+chr18	531672	532439	44.1209063817996	23.110950961895	57.7773774047375	73.5348439696659
+chr18	568036	569332	165.978647817246	157.574665649284	207.998558657055	221.655029679993
+chr18	589046	589875	50.4238930077709	72.4843461986707	117.655750351466	149.170683481322
 chr18	651923	652540	1.05049777099523	1.05049777099523	1.05049777099523	1.05049777099523
 chr18	657092	657876	1.05049777099523	1.05049777099523	1.05049777099523	1.05049777099523
 chr18	770235	770992	1.05049777099523	1.05049777099523	1.05049777099523	1.05049777099523
--- a/test-data/out_diffbind.bed	Tue Mar 20 04:51:25 2018 -0400
+++ b/test-data/out_diffbind.bed	Sat Apr 07 15:45:41 2018 -0400
@@ -1,5 +1,6 @@
-chr18	394600	396513	1914	*	7.15	7.89	5.55	2.35	7.06e-24	9.84e-21
-chr18	111567	112005	439	*	5.71	3.63	6.53	-2.89	1.27e-08	8.88e-06
-chr18	346464	347342	879	*	5	3.24	5.77	-2.52	6.51e-06	0.00303
-chr18	399014	400382	1369	*	7.62	8.05	7	1.04	1.04e-05	0.00364
-chr18	371110	372102	993	*	4.63	5.36	3.07	2.3	8.1e-05	0.0226
+seqnames	start	end	width	strand	Conc	Conc_Responsive	Conc_Resistant	Fold	p.value	FDR
+chr18	394600	396513	1914	*	7.15	5.55	7.89	-2.35	7.06e-24	9.84e-21
+chr18	111567	112005	439	*	5.71	6.53	3.63	2.89	1.27e-08	8.88e-06
+chr18	346464	347342	879	*	5	5.77	3.24	2.52	6.51e-06	0.00303
+chr18	399014	400382	1369	*	7.62	7	8.05	-1.04	1.04e-05	0.00364
+chr18	371110	372102	993	*	4.63	3.07	5.36	-2.3	8.1e-05	0.0226
Binary file test-data/out_plots.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_rscript.txt	Sat Apr 07 15:45:41 2018 -0400
@@ -0,0 +1,119 @@
+## Setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+suppressPackageStartupMessages({
+    library('getopt')
+    library('DiffBind')
+    library('rjson')
+})
+
+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs(trailingOnly = TRUE)
+
+#get options, using the spec as defined by the enclosed list.
+#we read the options from the default: commandArgs(TRUE).
+spec = matrix(c(
+    'infile' , 'i', 1, "character",
+    'outfile' , 'o', 1, "character",
+    'scorecol', 'n', 1, "integer",
+    'lowerbetter', 'l', 1, "logical",
+    'summits', 's', 1, "integer",
+    'th', 't', 1, "double",
+    'format', 'f', 1, "character",
+    'plots' , 'p', 2, "character",
+    'bmatrix', 'b', 0, "logical",
+    "rdaOpt", "r", 0, "logical",
+    'infoOpt' , 'a', 0, "logical",
+    'verbose', 'v', 2, "integer",
+    'help' , 'h', 0, "logical"
+), byrow=TRUE, ncol=4);
+
+opt = getopt(spec);
+
+# if help was asked for print a friendly message
+# and exit with a non-zero error code
+if ( !is.null(opt$help) ) {
+    cat(getopt(spec, usage=TRUE));
+    q(status=1);
+}
+
+parser <- newJSONParser()
+parser$addData(opt$infile)
+factorList <- parser$getObject()
+filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+peaks <- filenamesIn[grepl("peaks.bed", filenamesIn)]
+bams <- filenamesIn[grepl("bamreads.bam", filenamesIn)]
+ctrls <- filenamesIn[grepl("bamcontrol.bam", filenamesIn)]
+
+# get the group and sample id from the peaks filenames
+groups <- sapply(strsplit(peaks,"-"), `[`, 1)
+samples <- sapply(strsplit(peaks,"-"), `[`, 2)
+
+if ( length(ctrls) != 0 ) {
+    sampleTable <- data.frame(SampleID=samples,
+                        Condition=groups,
+                        bamReads=bams,
+                        bamControl=ctrls,
+                        Peaks=peaks,
+                        Tissue=samples, # using "Tissue" column to display ids as labels in PCA plot
+                        stringsAsFactors=FALSE)
+} else {
+    sampleTable <- data.frame(SampleID=samples,
+                        Replicate=samples,
+                        Condition=groups,
+                        bamReads=bams,
+                        Peaks=peaks,
+                        Tissue=samples,
+                        stringsAsFactors=FALSE)
+}
+
+sample = dba(sampleSheet=sampleTable, peakFormat='bed', scoreCol=opt$scorecol, bLowerScoreBetter=opt$lowerbetter)
+
+if ( !is.null(opt$summits) ) {
+    sample_count = dba.count(sample, summits=opt$summits)
+} else {
+    sample_count = dba.count(sample)
+}
+
+sample_contrast = dba.contrast(sample_count, categories=DBA_CONDITION, minMembers=2)
+sample_analyze = dba.analyze(sample_contrast)
+diff_bind = dba.report(sample_analyze, th=opt$th)
+
+# Generate plots
+if ( !is.null(opt$plots) ) {
+    pdf(opt$plots)
+    orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE, cexCol=0.8, th=opt$th)
+    dba.plotPCA(sample_analyze, contrast=1, th=opt$th, label=DBA_TISSUE, labelSize=0.3)
+    dba.plotMA(sample_analyze, th=opt$th)
+    dba.plotVolcano(sample_analyze, th=opt$th)
+    dba.plotBox(sample_analyze, th=opt$th)
+    dev.off()
+}
+
+# Output differential binding sites
+resSorted <- diff_bind[order(diff_bind$FDR),]
+write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE)
+
+# Output binding affinity scores
+if (!is.null(opt$bmatrix)) {
+    bmat <- dba.peakset(sample_count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME)
+    write.table(as.data.frame(bmat), file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE)
+}
+
+# Output RData file
+if (!is.null(opt$rdaOpt)) {
+    save.image(file = "DiffBind_analysis.RData")
+}
+
+# Output analysis info
+if (!is.null(opt$infoOpt)) {
+    info <- "DiffBind_analysis_info.txt"
+    cat("dba.count Info\n\n", file=info, append = TRUE)
+    capture.output(sample, file=info, append=TRUE)
+    cat("\ndba.analyze Info\n\n", file=info, append = TRUE)
+    capture.output(sample_analyze, file=info, append=TRUE)
+    cat("\nSessionInfo\n\n", file=info, append = TRUE)
+    capture.output(sessionInfo(), file=info, append=TRUE)
+}
\ No newline at end of file