# HG changeset patch # User iuc # Date 1521535885 14400 # Node ID d7725c5596ab8cd745b8105874a71dc26afeb346 # Parent 6171163112de3b428dd38b12d10cf83146453482 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit f970dcbe9d0e4c3714b1db74c404ea34223cf8ed diff -r 6171163112de -r d7725c5596ab diffbind.R --- a/diffbind.R Sun Jan 28 05:10:25 2018 -0500 +++ b/diffbind.R Tue Mar 20 04:51:25 2018 -0400 @@ -21,7 +21,8 @@ 'infile' , 'i', 1, "character", 'format', 'f', 1, "character", 'th', 't', 1, "double", - 'bmatrix', 'b', 0, "logical" + 'bmatrix', 'b', 0, "logical", + "rdaOpt", "r", 0, "logical" ), byrow=TRUE, ncol=4); opt = getopt(spec); @@ -43,6 +44,7 @@ sample_analyze = dba.analyze(sample_contrast) diff_bind = dba.report(sample_analyze) orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE) +dev.off() resSorted <- diff_bind[order(diff_bind$FDR),] write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE, col.names = FALSE) @@ -53,5 +55,10 @@ write.table(as.data.frame(bmat), file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) } -dev.off() +## Output RData file + +if (!is.null(opt$rdaOpt)) { + save.image(file = "DiffBind_analysis.RData") +} + sessionInfo() diff -r 6171163112de -r d7725c5596ab diffbind.xml --- a/diffbind.xml Sun Jan 28 05:10:25 2018 -0500 +++ b/diffbind.xml Tue Mar 20 04:51:25 2018 -0400 @@ -1,10 +1,8 @@ - + differential binding analysis of ChIP-Seq peak data - bioconductor-diffbind + bioconductor-diffbind r-getopt - - r-rmysql /dev/null | grep -v -i "WARNING: ")," getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmysql version" $(R --vanilla --slave -e "library(rmysql); cat(sessionInfo()\$otherPkgs\$rmysql\$Version)" 2> /dev/null | grep -v -i "WARNING: ") +echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]> @@ -66,7 +68,7 @@ #end for]]> - + @@ -79,32 +81,41 @@ - - - - - - - + + +
+ + + + + + + + + +
+ - + - - pdf == True + + out['pdf'] - - binding_affinity_matrix == True + + out['binding_matrix'] + + + out['rdata'] + - + @@ -142,9 +153,12 @@ - + + + + -2452 7 -5.61 3.57e-10 1.02e-06 -1291 5.97 -5.75 1.1e-09 1.57e-06 -976 7.92 -4.79 1.1e-08 1.05e-05 -2338 7.77 -5.93 1.68e-08 1.17e-05 -2077 6.13 -4.23 2.36e-08 1.17e-05 + Columns contain the following data: -The value columns show the -Conc mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) -Conc_Resistant mean concentration over the first (Resistant) group -Conc_Responsive mean concentration over second (Responsive) group -Fold column shows the difference in mean concentrations between the two groups (Conc_Resistant - Conc_Responsive), with a positive value indicating increased binding affinity in the Resistant group and a negative value indicating increased binding affinity in the Responsive group. -p-value confidence measure for identifying these sites as differentially bound -FDR a multiple testing corrected FDR p-value +* **1st**: Chromosome name +* **2nd**: Start position of site +* **3rd**: End position of site +* **4th**: Length of site +* **5th**: Strand +* **6th**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) +* **7th**: Mean concentration over the first (e.g. Resistant) group +* **8th**: Mean concentration over second (e.g. Responsive) group +* **9th**: Fold shows the difference in mean concentrations between the two groups (e.g. Resistant - Responsive), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group. +* **10th**: P-value confidence measure for identifying these sites as differentially bound +* **11th**: a multiple testing corrected FDR p-value **Binding Affinity Matrix** @@ -315,7 +323,7 @@ ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22 ====== ====== ====== ========== ========== ========= ====== ========= ==== - +----- **More Information** @@ -328,21 +336,18 @@ #. Plotting and reporting - * **Reading in peaksets**: +**Reading in peaksets**: The first step is to read in a set of peaksets and associated -metadata. Peaksets are derived either from ChIP-Seq peak callers, such as MACS -([1]), or using some other criterion (e.g. genomic windows, or all the promoter regions -in a genome). The easiest way to read in peaksets is using a comma-separated value -(csv) sample sheet with one line for each peakset. (Spreadsheets in Excel® format, with -a .xls or .xlsx suffix, are also accepted.) A single experiment can have more than +metadata. Peaksets are derived either from ChIP-Seq peak callers, such as **MACS2**, or using some other criterion (e.g. genomic windows, or all the promoter regions +in a genome). A single experiment can have more than one associated peakset; e.g. if multiple peak callers are used for comparison purposes each sample would have more than one line in the sample sheet. Once the peaksets are read in, a merging function finds all overlapping peaks and derives a single set of unique genomic intervals covering all the supplied peaks (a consensus peakset for the experiment). - * **Occupancy analysis**: +**Occupancy analysis**: Peaksets, especially those generated by peak callers, provide an insight into the potential occupancy of the protein being ChIPed for at specific @@ -356,7 +361,7 @@ a consensus peakset, representing an overall set of candidate binding sites to be used in further analysis. - * **Counting reads**: +**Counting reads**: Once a consensus peakset has been derived, DiffBind can use the supplied sequence read files to count how many reads overlap each interval for each @@ -368,7 +373,7 @@ data. The binding affinity matrix is used for QC plotting as well as for subsequent differential analysis. - * **Differential binding affinity analysis**: +**Differential binding affinity analysis**: The core functionality of DiffBind is the differential binding affinity analysis, which enables binding sites to be identified that @@ -378,7 +383,7 @@ This will assign a p-value and FDR to each candidate binding site indicating confidence that they are differentially bound. - * **Plotting and reporting**: +**Plotting and reporting**: Once one or more contrasts have been run, DiffBind provides a number of functions for reporting and plotting the results. MA plots give an @@ -387,7 +392,9 @@ of reads within differentially bound sites corresponding to whether they gain or lose affinity between the two sample groups. A reporting mechanism enables differentially bound sites to be extracted for further processing, such as annotation, motif, and -pathway analyses. +pathway analyses. *Note that currently only the correlation plot is implemented in this Galaxy tool.* + +----- **References** diff -r 6171163112de -r d7725c5596ab test-data/DiffBind_analysis.RData Binary file test-data/DiffBind_analysis.RData has changed diff -r 6171163112de -r d7725c5596ab test-data/out_plots.pdf Binary file test-data/out_plots.pdf has changed