Mercurial > repos > dongjun > mosaics
changeset 1:f0b6fb422967 draft
Deleted selected files
author | dongjun |
---|---|
date | Thu, 10 Jan 2013 15:55:39 -0500 |
parents | b2567f7ff12f |
children | b6d0c6ceda2c |
files | mosaics.xml mosaics_1.0.7.tar.gz mosaics_wrapper.pl |
diffstat | 3 files changed, 0 insertions(+), 352 deletions(-) [+] |
line wrap: on
line diff
--- a/mosaics.xml Wed Sep 21 03:27:06 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,241 +0,0 @@ -<tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="1.0.0"> - - <description></description> - - <parallelism method="basic"></parallelism> - - <requirements> - <requirement type="binary">R</requirement> - </requirements> - - <command interpreter="perl"> - mosaics_wrapper.pl - ## input file name (chip and control) - $chipParams.chip - $controlParams.control - ## input file format (chip and control) - $chipParams.chipFileFormat - $controlParams.controlFileFormat - ## peak file name - $out_peak - ## peak file format - $OutfileFormat - ## analysis type - IO - ## optional output - $report_summary - $report_gof - $report_exploratory - ## settings for model fitting and peak calling: required (0.05, 200, 50) - $fdrLevel - $fragLen - $binSize - $capping - ## settings for model fitting and peak calling: optional - #if $fitParams.fSettingsType == "preSet" - BIC - 0.25 - 200 - 50 - 10 - #else - $fitParams.signalModel - $fitParams.d - $fitParams.maxgap - $fitParams.minsize - $fitParams.thres - #end if - ## Number of cores to use - 8 - </command> - - <inputs> - <conditional name="chipParams"> - <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS can accept aligned read files."> - <option value="eland_result">Eland result</option> - <option value="eland_extended">Eland extended</option> - <option value="eland_export">Eland export</option> - <option value="bowtie">Bowtie default</option> - <option value="sam">SAM</option> - </param> - <when value="eland_result"> - <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/> - </when> - <when value="eland_extended"> - <param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/> - </when> - <when value="eland_export"> - <param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/> - </when> - <when value="bowtie"> - <param name="chip" type="data" label="Bowtie default file for ChIP sample"/> - </when> - <when value="sam"> - <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/> - </when> - </conditional> <!-- chipParams --> - <conditional name="controlParams"> - <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS can accept aligned read files."> - <option value="eland_result">Eland result</option> - <option value="eland_extended">Eland extended</option> - <option value="eland_export">Eland export</option> - <option value="bowtie">Bowtie default</option> - <option value="sam">SAM</option> - </param> - <when value="eland_result"> - <param name="control" type="data" format="eland" label="Eland result file for control sample"/> - </when> - <when value="eland_extended"> - <param name="control" type="data" format="eland" label="Eland extended file for control sample"/> - </when> - <when value="eland_export"> - <param name="control" type="data" format="eland" label="Eland export file for control sample"/> - </when> - <when value="bowtie"> - <param name="control" type="data" label="Bowtie default file for control sample"/> - </when> - <when value="sam"> - <param name="control" type="data" format="sam" label="SAM file for control sample"/> - </when> - </conditional> <!-- inputParams --> - - <param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table."> - <option value="bed">BED</option> - <option value="gff">GFF</option> - <option value="txt">table</option> - </param> - <param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" /> - <param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" /> - <param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" /> - - <param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" /> - <param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." /> - <param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." /> - <param name="capping" type="integer" value="3" label="Maximum number of reads allowed to start at each nucleotide position" help="Small value (e.g., 3) are recommended for the ChIP-seq data with low sequencing depth and large value (e.g., 10000) for the ChIP-seq data with high sequencing depth." /> - - <conditional name="fitParams"> - <param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'."> - <option value="preSet">Commonly used</option> - <option value="full">Full parameter list</option> - </param> - <when value="preSet" /> - <when value="full"> - <param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC. Instead, user can specify signal model among one or two signal component models."> - <option value="BIC">Automatic model selection based on BIC</option> - <option value="1S">One-signal-component model</option> - <option value="2S">Two-signal-component model</option> - </param> - <param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." /> - <param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." /> - <param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." /> - <param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." /> - </when> <!-- full --> - </conditional> <!-- fitParams --> - </inputs> - - <outputs> - <data format="tabular" name="out_peak"> - <change_format> - <when input="OutfileFormat" value="bed" format="bed" /> - <when input="OutfileFormat" value="gff" format="gff" /> - </change_format> - </data> - <data format="txt" name="report_summary"> - <filter>summary == 1</filter> - </data> - <data format="pdf" name="report_gof"> - <filter>gof == 1</filter> - </data> - <data format="pdf" name="report_exploratory"> - <filter>exploratory == 1</filter> - </data> - </outputs> - - <help> - -**What it does** - -MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions). -MOSAiCS is also available in Bioconductor_ as a R package. -We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_. - -Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," To appear in the *Journal of the American Statistical Association*. - -.. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.8/bioc/html/mosaics.html -.. _Google group: http://groups.google.com/group/mosaics_user_group -.. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706 - ------- - -**Input formats** - -MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts single-end reads, in Eland result, Eland extended, Eland export, Bowtie default, and SAM formats. - ------- - -**Outputs** - -Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak. - -If the output is a table, it has the following columns:: - - Column Description - -------- -------------------------------------------------------- - 1 Chromosome of the peak - 2 Start position of the peak - 3 End position of the peak - 4 Width of the peak - 5 Averaged posterior probability of the peak - 6 Minimum posterior probability of the peak - 7 Averaged ChIP tag counts of the peak - 8 Maximum ChIP tag counts of the peak - 9 Averaged control tag counts of the peak - 10 Averaged control tag counts of the peak, scaled by sequencing depth - 11 Averaged log base 2 ratio of ChIP over input tag counts - -If the output is in BED format, it has the following columns:: - - Column Description - ------------ -------------------------------------------------------- - 1 chrom Chromosome of the peak - 2 chromStart Start position of the peak - 3 chromEnd End position of the peak - 4 name Always "MOSAiCS_peak" - 5 score Averaged ChIP tag counts of the peak - -If the output is in GFF format, it has the following columns:: - - Column Description - --------- -------------------------------------------------------- - 1 seqname Chromosome of the peak - 2 source Always "MOSAiCS" - 3 feature Always "MOSAiCS_peak" - 4 start Start position of the peak - 5 end End position of the peak - 6 score Averaged ChIP tag counts of the peak - 7 strand Always "." - 8 frame Always "." - 9 group Always "." - ------- - -**Reports for diagnostics** - -*Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results. - -*Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data. - -*Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts. - -More details regarding these reports can be found here_: - ------- - -**Settings for model fitting and peak calling** - -More details about the tuning of these parameters can be found here_: - -.. _here: http://www.bioconductor.org/packages/2.8/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf - - </help> -</tool>
--- a/mosaics_wrapper.pl Wed Sep 21 03:27:06 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,111 +0,0 @@ -# Wrapper for MOSAiCS -# Written by Dongjun Chung, Sep. 15, 2011 - -#!/usr/bin/env perl; -use warnings; -use strict; -use File::Temp qw/tempfile/; -use File::Temp qw/tempdir/; -use File::Basename; - -# parse command arguments - -die "Usage: perl mosaics_wrapper.pl [chip_path] [control_path] [chip_file_format] [control_file_format] [peak_path] [peak_file_format] [analysis_type] [report_summary_path] [report_gof_path] [report_exploratory_path] [fdr_level] [frag_len] [bin_size] [capping] [signal_model] [d] [maxgap] [minsize] [thres] [n_core]" unless @ARGV == 20; - -my ( $chip_path, $control_path, $chip_file_format, $control_file_format, $peak_path, $peak_file_format, $analysis_type, $report_summary_path, $report_gof_path, $report_exploratory_path, $fdr_level, $frag_len, $bin_size, $capping, $signal_model, $d, $maxgap, $minsize, $thres, $n_core ) = @ARGV; - -# parse options: analysis type - -if ( $analysis_type ne "IO" ) { - print "Only 'IO' is supported for analysis type!\n"; - exit 1; -} - -# parse options: ChIP, control, peak - -my ($chip_filename, $chip_dir) = fileparse($chip_path); -my ($control_filename, $control_dir) = fileparse($control_path); -my ($peak_filename, $peak_dir) = fileparse($peak_path); - -# parse options: report summary - -my $report_summary = "FALSE"; -my $summary_dir = "NULL"; -my $summary_filename = "NULL"; -if ( $report_summary_path ne "None" ) { - $report_summary = "TRUE"; - ($summary_filename, $summary_dir) = fileparse($report_summary_path); -} - -# parse options: report GOF - -my $report_gof = "FALSE"; -my $gof_dir = "NULL"; -my $gof_filename = "NULL"; -if ( $report_gof_path ne "None" ) { - $report_gof = "TRUE"; - ($gof_filename, $gof_dir) = fileparse($report_gof_path); -} - -# parse options: report exploratory analysis - -my $report_exploratory = "FALSE"; -my $exploratory_dir = "NULL"; -my $exploratory_filename = "NULL"; -if ( $report_exploratory_path ne "None" ) { - $report_exploratory = "TRUE"; - ($exploratory_filename, $exploratory_dir) = fileparse($report_exploratory_path); -} - -# write a R scrip to run - -my $tempdir_bin = tempdir(); - -my $cmd = qq| - suppressPackageStartupMessages(library(mosaics)) - try( suppressPackageStartupMessages(library(rparallel)), silent=TRUE ) - - mosaicsRunAll( - chipDir="$chip_dir", - chipFileName="$chip_filename", - chipFileFormat="$chip_file_format", - controlDir="$control_dir", - controlFileName="$control_filename", - controlFileFormat="$control_file_format", - binfileDir="$tempdir_bin", - peakDir="$peak_dir", - peakFileName="$peak_filename", - peakFileFormat="$peak_file_format", - reportSummary=$report_summary, - summaryDir="$summary_dir", - summaryFileName="$summary_filename", - reportExploratory=$report_exploratory, - exploratoryDir="$exploratory_dir", - exploratoryFileName="$exploratory_filename", - reportGOF=$report_gof, - gofDir="$gof_dir", - gofFileName="$gof_filename", - FDR=$fdr_level, - fragLen=$frag_len, - binSize=$bin_size, - capping=$capping, - analysisType="$analysis_type", - d=$d, - signalModel="$signal_model", - maxgap=$maxgap, - minsize=$minsize, - thres=$thres, - nCore=$n_core ) - - q() - |; - -# run R - -open( FT, "| R --slave --vanilla >& /dev/null" ) or die "Couldn't call R!\n"; -print FT $cmd, "\n"; -close FT or die "Couldn't finish R!\n"; - -exit; - -