# HG changeset patch # User dongjun # Date 1316590026 14400 # Node ID b2567f7ff12f53d6ded48a958119ead9db8a0ec5 Uploaded diff -r 000000000000 -r b2567f7ff12f mosaics.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mosaics.xml Wed Sep 21 03:27:06 2011 -0400 @@ -0,0 +1,241 @@ + + + + + + + + R + + + + mosaics_wrapper.pl + ## input file name (chip and control) + $chipParams.chip + $controlParams.control + ## input file format (chip and control) + $chipParams.chipFileFormat + $controlParams.controlFileFormat + ## peak file name + $out_peak + ## peak file format + $OutfileFormat + ## analysis type + IO + ## optional output + $report_summary + $report_gof + $report_exploratory + ## settings for model fitting and peak calling: required (0.05, 200, 50) + $fdrLevel + $fragLen + $binSize + $capping + ## settings for model fitting and peak calling: optional + #if $fitParams.fSettingsType == "preSet" + BIC + 0.25 + 200 + 50 + 10 + #else + $fitParams.signalModel + $fitParams.d + $fitParams.maxgap + $fitParams.minsize + $fitParams.thres + #end if + ## Number of cores to use + 8 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + summary == 1 + + + gof == 1 + + + exploratory == 1 + + + + + +**What it does** + +MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions). +MOSAiCS is also available in Bioconductor_ as a R package. +We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_. + +Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," To appear in the *Journal of the American Statistical Association*. + +.. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.8/bioc/html/mosaics.html +.. _Google group: http://groups.google.com/group/mosaics_user_group +.. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706 + +------ + +**Input formats** + +MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts single-end reads, in Eland result, Eland extended, Eland export, Bowtie default, and SAM formats. + +------ + +**Outputs** + +Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak. + +If the output is a table, it has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 Chromosome of the peak + 2 Start position of the peak + 3 End position of the peak + 4 Width of the peak + 5 Averaged posterior probability of the peak + 6 Minimum posterior probability of the peak + 7 Averaged ChIP tag counts of the peak + 8 Maximum ChIP tag counts of the peak + 9 Averaged control tag counts of the peak + 10 Averaged control tag counts of the peak, scaled by sequencing depth + 11 Averaged log base 2 ratio of ChIP over input tag counts + +If the output is in BED format, it has the following columns:: + + Column Description + ------------ -------------------------------------------------------- + 1 chrom Chromosome of the peak + 2 chromStart Start position of the peak + 3 chromEnd End position of the peak + 4 name Always "MOSAiCS_peak" + 5 score Averaged ChIP tag counts of the peak + +If the output is in GFF format, it has the following columns:: + + Column Description + --------- -------------------------------------------------------- + 1 seqname Chromosome of the peak + 2 source Always "MOSAiCS" + 3 feature Always "MOSAiCS_peak" + 4 start Start position of the peak + 5 end End position of the peak + 6 score Averaged ChIP tag counts of the peak + 7 strand Always "." + 8 frame Always "." + 9 group Always "." + +------ + +**Reports for diagnostics** + +*Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results. + +*Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data. + +*Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts. + +More details regarding these reports can be found here_: + +------ + +**Settings for model fitting and peak calling** + +More details about the tuning of these parameters can be found here_: + +.. _here: http://www.bioconductor.org/packages/2.8/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf + + + diff -r 000000000000 -r b2567f7ff12f mosaics_1.0.7.tar.gz Binary file mosaics_1.0.7.tar.gz has changed diff -r 000000000000 -r b2567f7ff12f mosaics_wrapper.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mosaics_wrapper.pl Wed Sep 21 03:27:06 2011 -0400 @@ -0,0 +1,111 @@ +# Wrapper for MOSAiCS +# Written by Dongjun Chung, Sep. 15, 2011 + +#!/usr/bin/env perl; +use warnings; +use strict; +use File::Temp qw/tempfile/; +use File::Temp qw/tempdir/; +use File::Basename; + +# parse command arguments + +die "Usage: perl mosaics_wrapper.pl [chip_path] [control_path] [chip_file_format] [control_file_format] [peak_path] [peak_file_format] [analysis_type] [report_summary_path] [report_gof_path] [report_exploratory_path] [fdr_level] [frag_len] [bin_size] [capping] [signal_model] [d] [maxgap] [minsize] [thres] [n_core]" unless @ARGV == 20; + +my ( $chip_path, $control_path, $chip_file_format, $control_file_format, $peak_path, $peak_file_format, $analysis_type, $report_summary_path, $report_gof_path, $report_exploratory_path, $fdr_level, $frag_len, $bin_size, $capping, $signal_model, $d, $maxgap, $minsize, $thres, $n_core ) = @ARGV; + +# parse options: analysis type + +if ( $analysis_type ne "IO" ) { + print "Only 'IO' is supported for analysis type!\n"; + exit 1; +} + +# parse options: ChIP, control, peak + +my ($chip_filename, $chip_dir) = fileparse($chip_path); +my ($control_filename, $control_dir) = fileparse($control_path); +my ($peak_filename, $peak_dir) = fileparse($peak_path); + +# parse options: report summary + +my $report_summary = "FALSE"; +my $summary_dir = "NULL"; +my $summary_filename = "NULL"; +if ( $report_summary_path ne "None" ) { + $report_summary = "TRUE"; + ($summary_filename, $summary_dir) = fileparse($report_summary_path); +} + +# parse options: report GOF + +my $report_gof = "FALSE"; +my $gof_dir = "NULL"; +my $gof_filename = "NULL"; +if ( $report_gof_path ne "None" ) { + $report_gof = "TRUE"; + ($gof_filename, $gof_dir) = fileparse($report_gof_path); +} + +# parse options: report exploratory analysis + +my $report_exploratory = "FALSE"; +my $exploratory_dir = "NULL"; +my $exploratory_filename = "NULL"; +if ( $report_exploratory_path ne "None" ) { + $report_exploratory = "TRUE"; + ($exploratory_filename, $exploratory_dir) = fileparse($report_exploratory_path); +} + +# write a R scrip to run + +my $tempdir_bin = tempdir(); + +my $cmd = qq| + suppressPackageStartupMessages(library(mosaics)) + try( suppressPackageStartupMessages(library(rparallel)), silent=TRUE ) + + mosaicsRunAll( + chipDir="$chip_dir", + chipFileName="$chip_filename", + chipFileFormat="$chip_file_format", + controlDir="$control_dir", + controlFileName="$control_filename", + controlFileFormat="$control_file_format", + binfileDir="$tempdir_bin", + peakDir="$peak_dir", + peakFileName="$peak_filename", + peakFileFormat="$peak_file_format", + reportSummary=$report_summary, + summaryDir="$summary_dir", + summaryFileName="$summary_filename", + reportExploratory=$report_exploratory, + exploratoryDir="$exploratory_dir", + exploratoryFileName="$exploratory_filename", + reportGOF=$report_gof, + gofDir="$gof_dir", + gofFileName="$gof_filename", + FDR=$fdr_level, + fragLen=$frag_len, + binSize=$bin_size, + capping=$capping, + analysisType="$analysis_type", + d=$d, + signalModel="$signal_model", + maxgap=$maxgap, + minsize=$minsize, + thres=$thres, + nCore=$n_core ) + + q() + |; + +# run R + +open( FT, "| R --slave --vanilla >& /dev/null" ) or die "Couldn't call R!\n"; +print FT $cmd, "\n"; +close FT or die "Couldn't finish R!\n"; + +exit; + +