changeset 12:fa56d93f7980 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 11f68fe2b872f5abc5b660adb10336b0955fa0ee
author iuc
date Thu, 19 Apr 2018 17:15:53 -0400
parents 4c7ab9995f9e
children 1de83981d43c
files diffbind.xml test-data/out_diffbind.bed test-data/out_diffbind.tab
diffstat 3 files changed, 82 insertions(+), 86 deletions(-) [+]
line wrap: on
line diff
--- a/diffbind.xml	Sat Apr 07 15:45:41 2018 -0400
+++ b/diffbind.xml	Thu Apr 19 17:15:53 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="diffbind" name="DiffBind" version="2.6.6.1">
+<tool id="diffbind" name="DiffBind" version="2.6.6.2">
     <description> differential binding analysis of ChIP-Seq peak data</description>
     <requirements>
         <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement>
@@ -17,7 +17,7 @@
         <regex match="Error in"
            source="both"
            level="fatal"
-           description="An undefined error occured, please check your intput carefully and contact your administrator." />
+           description="An undefined error occured, please check your input carefully and contact your administrator." />
     </stdio>
     <version_command><![CDATA[
 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
@@ -65,7 +65,7 @@
 #end for
 
 $temp_factor.reverse()
-$temp_factor_names.append([str($factorName), $temp_factor])
+$temp_factor_names.append(["Condition", $temp_factor])
 
 
 Rscript '$__tool_directory__/diffbind.R'
@@ -104,26 +104,30 @@
 ]]>
     </command>
     <inputs>
-        <param name="factorName" type="text" label="Name" help="Name of experiment factor of interest (e.g. Condition). One factor must be entered and there must be two or more groups. NOTE: Please only use letters, numbers or underscores.">
-        <sanitizer>
-            <valid initial="string.letters,string.digits"><add value="_" /></valid>
-        </sanitizer>
-        </param>
-        <repeat name="rep_group" title="Group" min="2" default="2">
+        <repeat name="rep_group" title="Group" min="2" max="2" default="2">
             <param name="groupName" type="text" label="Name"
-            help="Name of group that the peak files belong to (e.g. Resistant or Responsive). NOTE: Please only use letters, numbers or underscores (case sensitive).">
-            <sanitizer>
-                <valid initial="string.letters,string.digits"><add value="_" /></valid>
-            </sanitizer>
+            help="Name for the Group that the peak and BAM files belong to e.g. Resistant/Responsive (two Groups in total must be specified for DiffBind). NOTE: Please only use letters, numbers or underscores.">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+                <validator type="empty_field" />
             </param>
             <param name="peaks" type="data" format="bed" multiple="true" label="Peak files" help="Result of your Peak calling experiment"/>
-            <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM file" help="Specify the Read BAM file used for Peak calling."/>
-            <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM file" help="If specifying a control BAM file, all samples are required to specify one."/>
+            <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM file" help="Specify the Read BAM file used in the Peak calling."/>
+            <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM file" help="If specifying a control BAM file, all samples are required to specify one, see Help section below."/>
         </repeat>
 
-        <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores.  Default: 8 (narrowPeak)"/>
+        <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores. Default: 8 (narrowPeak)">
+            <sanitizer>
+                <valid initial="string.digits"/>
+            </sanitizer>
+        </param>
         <param name="lowerbetter" type="boolean" truevalue="True" falsevalue="" checked="False" label="Lower score is better?" help="DiffBind by default assumes that a higher score indicates a better peak, for example narrowPeaks -log10pvalue. If this is not the case, for example if the score is a p-value or FDR, set this option to Yes. Default: No" />
-        <param name="summits" type="integer" min="0" optional="True" label="Summits" help="Extend peaks Nbp up- and downstream of the summit. For punctate peaks it is advisable to extend (e.g. 250bp), see the DiffBind User Guide"/>
+        <param name="summits" type="integer" min="0" optional="True" label="Summits" help="Extend peaks Nbp up- and downstream of the summit. For punctate peaks it is advisable to extend (e.g. 250bp), see the DiffBind User Guide">
+            <sanitizer>
+                <valid initial="string.digits"/>
+            </sanitizer>
+        </param>
         <param name="th" type="float" value="0.05" min="0" max="1" label="FDR Threshold" help="Significance threshold; all sites with FDR less than or equal to this value will be included in the output. A value of 1 will output all binding sites. Default: 0.05"/>
 
         <!-- Output Options -->
@@ -142,12 +146,7 @@
     </inputs>
 
     <outputs>
-        <data name="outfile" format="bed" label="${tool.name} on ${on_string}: Differentially bound sites">
-            <change_format>
-                <when input="format" value="wig" format="wig" />
-                <when input="format" value="gff" format="gff" />
-            </change_format>
-        </data>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}: Differentially bound sites" />
         <data name="plots" format="pdf" label="${tool.name} on ${on_string}: Plots">
             <filter>out['pdf']</filter>
         </data>
@@ -167,7 +166,6 @@
 
     <tests>
         <test expect_num_outputs="6">
-            <param name="factorName" value="Condition"/>
             <repeat name="rep_group">
                 <param name="groupName" value="Resistant"/>
                 <param name="peaks" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
@@ -184,7 +182,7 @@
             <param name="rdata" value="True" />
             <param name="rscript" value="True"/>
             <param name="analysis_info" value="True"/>
-            <output name="outfile" value="out_diffbind.bed" />
+            <output name="outfile" value="out_diffbind.tab" />
             <output name="plots" value="out_plots.pdf" compare="sim_size" />
             <output name="binding_matrix" value="out_binding.matrix" />
             <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/>
@@ -235,18 +233,18 @@
 candidate protein binding sites. Each interval consists of a chromosome, a start and end
 position, and usually a score of some type indicating confidence in, or strength of, the peak.
 Associated with each peakset are metadata relating to the experiment from which the peakset
-was derived. Additionally, files containing mapped sequencing reads (generally .bam files) can
+was derived. Additionally, files containing mapped sequencing reads (BAM files) need to
 be associated with each peakset (one for the ChIP data, and optionally another representing
 a control sample)
 
-**Sample Information**
+**Groups**
 
-You have to specify your sample information in the tool form above, where Factor is the groups you want to compare (e.g Resistant and Responsive).
+You have to specify the name of the Group and the peak and BAM files for the two Groups you want to compare (e.g Resistant and Responsive) in the tool form above.
 
 Example:
 
     ============= =============
-     **SampleID** **Group**
+     **Sample**   **Group**
     ------------- -------------
     BT4741        Resistant
     BT4742        Resistant
@@ -259,21 +257,21 @@
 
 Result of your Peak calling experiment in bed format, one file for each sample is required. The peak caller, format and score column can be specified in the tool form above. The default settings expect narrowPeak bed format, which has the score in the 8th column (-log10pvalue), and can be output from MACS2.
 
-Example (MACS.xls file in bed format):
+Example:
 
     ======= ======= ======= =============== ==============
     1          2      3          4           **5 (Score)**
     ======= ======= ======= =============== ==============
-    chr18   215562  216063  MACS_peak_16037 56.11
-    chr18   311530  312105  MACS_peak_16038 222.49
-    chr18   356656  357315  MACS_peak_16039 92.06
-    chr18   371110  372092  MACS_peak_16040 123.86
-    chr18   395116  396464  MACS_peak_16041 1545.39
-    chr18   399014  400382  MACS_peak_16042 1835.19
-    chr18   499134  500200  MACS_peak_16043 748.32
-    chr18   503518  504552  MACS_peak_16044 818.30
-    chr18   531672  532274  MACS_peak_16045 159.30
-    chr18   568326  569282  MACS_peak_16046 601.11
+    chr18   215562  216063  peak_16037      56.11
+    chr18   311530  312105  peak_16038      222.49
+    chr18   356656  357315  peak_16039      92.06
+    chr18   371110  372092  peak_16040      123.86
+    chr18   395116  396464  peak_16041      1545.39
+    chr18   399014  400382  peak_16042      1835.19
+    chr18   499134  500200  peak_16043      748.32
+    chr18   503518  504552  peak_16044      818.30
+    chr18   531672  532274  peak_16045      159.30
+    chr18   568326  569282  peak_16046      601.11
     ======= ======= ======= =============== ==============
 
 * BAM file which contains the mapped sequencing reads associated with each peakset, one file for each sample is required.
@@ -285,7 +283,7 @@
 
 This tool outputs
 
-    * differentially bound sites in BED, WIG or GFF format
+    * a table of differentially bound sites
 
 Optionally, under **Output Options** you can choose to output
 
@@ -297,33 +295,31 @@
 
 **Differentially Bound Sites**
 
-As output format you can choose BED, GFF, WIG.
-
-Example - BED format:
+Example:
 
-    ========  ======  ======  =====  ======  =====  ===============  ==============  =======   ========  ========
-    seqnames  start   end     width  strand  Conc   Conc_Responsive  Conc_Resistant  Fold      p.value   **FDR**
-    ========  ======  ======  =====  ======  =====  ===============  ==============  =======   ========  ========
-    chr18     394600  396513  1914    *      7.15   5.55             7.89            -2.35     7.06e-24  9.84e-21
-    chr18     111567  112005  439     *      5.71   6.53             3.63            2.89      1.27e-08  8.88e-06
-    chr18     346464  347342  879     *      5      5.77             3.24            2.52      6.51e-06  0.00303
-    chr18     399014  400382  1369    *      7.62   7                8.05            -1.04     1.04e-05  0.00364
-    chr18     371110  372102  993     *      4.63   3.07             5.36            -2.3      8.1e-05   0.0226
-    ========  ======  ======  =====  ======  =====  ===============  ==============  =======   ========  ========
+    ========  ======  ======  =====  ======  =====  ===============  ==============  ======  ========  ========
+    seqnames  start   end     width  strand  Conc   Conc_Responsive  Conc_Resistant  Fold    p.value   **FDR**
+    ========  ======  ======  =====  ======  =====  ===============  ==============  ======  ========  ========
+    chr18     394600  396513  1914    \*     7.15   5.55             7.89            -2.35   7.06e-24  9.84e-21
+    chr18     111567  112005  439     \*     5.71   6.53             3.63            2.89    1.27e-08  8.88e-06
+    chr18     346464  347342  879     \*     5      5.77             3.24            2.52    6.51e-06  0.00303
+    chr18     399014  400382  1369    \*     7.62   7                8.05            -1.04   1.04e-05  0.00364
+    chr18     371110  372102  993     \*     4.63   3.07             5.36            -2.3    8.1e-05   0.0226
+    ========  ======  ======  =====  ======  =====  ===============  ==============  ======  ========  ========
 
-    Columns contain the following data:
+Columns contain the following data:
 
-* **1st**: Chromosome name
-* **2nd**: Start position of site
-* **3rd**: End position of site
-* **4th**: Length of site
-* **5th**: Strand
-* **6th**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted)
-* **7th**: Mean concentration over the first (e.g. Resistant) group
-* **8th**: Mean concentration over second (e.g. Responsive) group
-* **9th**: Fold shows the difference in mean concentrations between the two groups (e.g. Resistant - Responsive), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group.
-* **10th**: P-value confidence measure for identifying these sites as differentially bound
-* **11th**: a multiple testing corrected FDR p-value
+* **seqnames**: Chromosome name
+* **start**: Start position of site
+* **end**: End position of site
+* **width**: Length of site
+* **strand**: Strand
+* **Conc**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted)
+* **Responsive**: Mean concentration over the first (e.g. Responsive) group
+* **Resistant**: Mean concentration over second (e.g. Resistant) group
+* **Fold**: Fold shows the difference in mean concentrations between the two groups (e.g. Responsive - Resistant), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group.
+* **p.value**: P-value confidence measure for identifying these sites as differentially bound
+* **FDR**: a multiple testing corrected FDR p-value
 
 
 **Binding Affinity Matrix**
@@ -333,20 +329,20 @@
 
 Example:
 
-    =====  ======  ======  ================  ================  ================  ================
-    CHR    START   END     MCF7_ER_1.bed     MCF7_ER_2.bed     BT474_ER_1.bed    BT474_ER_2.bed
-    =====  ======  ======  ================  ================  ================  ================
-    chr18  111567  112005  137.615208000375  59.878372946728   29.4139375878664  19.9594576489093
-    chr18  189223  189652  19.9594576489093  12.6059732519427  11.5554754809475  23.110950961895
-    chr18  215232  216063  11.5554754809475  15.7574665649284  31.5149331298568  72.4843461986707
-    chr18  311530  312172  17.8584621069189  11.5554754809475  54.6258840917518  43.0704086108043
-    chr18  346464  347342  75.6358395116564  40.9694130688139  21.0099554199046  16.8079643359236
-    chr18  356560  357362  11.5554754809475  14.7069687939332  57.7773774047375  53.5753863207566
-    chr18  371110  372102  8.40398216796182  9.45447993895705  81.9388261376278  82.989323908623
-    chr18  394600  396513  56.7268796337423  43.0704086108043  510.541916703681  438.05757050501
-    chr18  399014  400382  156.524167878289  117.655750351465  558.864814169461  496.885445680743
-    chr18  498906  500200  767.913870597511  278.381909313735  196.443083176108  181.736114382174
-    =====  ======  ======  ================  ================  ================  ================
+    =====  ======  ======  =========  =========  ==========  ==========
+    CHR    START   END     MCF7_ER_1  MCF7_ER_2  BT474_ER_1  BT474_ER_2
+    =====  ======  ======  =========  =========  ==========  ==========
+    chr18  111567  112005  137.6152   59.87837   29.41393    19.95945
+    chr18  189223  189652  19.95945   12.60597   11.55547    23.11095
+    chr18  215232  216063  11.55547   15.75746   31.51493    72.48434
+    chr18  311530  312172  17.85846   11.55547   54.62588    43.07040
+    chr18  346464  347342  75.63583   40.96941   21.00995    16.80796
+    chr18  356560  357362  11.55547   14.70696   57.77737    53.57538
+    chr18  371110  372102  8.403982   9.454479   81.93882    82.98932
+    chr18  394600  396513  56.72687   43.07040   510.5419    438.0575
+    chr18  399014  400382  156.5241   117.6557   558.8648    496.8854
+    chr18  498906  500200  767.9138   278.3819   196.4430    181.7361
+    =====  ======  ======  =========  =========  ==========  ==========
 
 -----
 
@@ -404,7 +400,7 @@
 differential binding affinity analysis, which enables binding sites to be identified that
 are statistically significantly differentially bound between sample groups. To accomplish
 this, first a contrast (or contrasts) is established, dividing the samples into groups to
-be compared. Next the core analysis routines are executed, by default using DESeq2 .
+be compared. Next the core analysis routines are executed, by default using DESeq2.
 This will assign a p-value and FDR to each candidate binding site indicating confidence
 that they are differentially bound.
 
--- a/test-data/out_diffbind.bed	Sat Apr 07 15:45:41 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-seqnames	start	end	width	strand	Conc	Conc_Responsive	Conc_Resistant	Fold	p.value	FDR
-chr18	394600	396513	1914	*	7.15	5.55	7.89	-2.35	7.06e-24	9.84e-21
-chr18	111567	112005	439	*	5.71	6.53	3.63	2.89	1.27e-08	8.88e-06
-chr18	346464	347342	879	*	5	5.77	3.24	2.52	6.51e-06	0.00303
-chr18	399014	400382	1369	*	7.62	7	8.05	-1.04	1.04e-05	0.00364
-chr18	371110	372102	993	*	4.63	3.07	5.36	-2.3	8.1e-05	0.0226
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_diffbind.tab	Thu Apr 19 17:15:53 2018 -0400
@@ -0,0 +1,6 @@
+seqnames	start	end	width	strand	Conc	Conc_Responsive	Conc_Resistant	Fold	p.value	FDR
+chr18	394600	396513	1914	*	7.15	5.55	7.89	-2.35	7.06e-24	9.84e-21
+chr18	111567	112005	439	*	5.71	6.53	3.63	2.89	1.27e-08	8.88e-06
+chr18	346464	347342	879	*	5	5.77	3.24	2.52	6.51e-06	0.00303
+chr18	399014	400382	1369	*	7.62	7	8.05	-1.04	1.04e-05	0.00364
+chr18	371110	372102	993	*	4.63	3.07	5.36	-2.3	8.1e-05	0.0226