Mercurial > repos > dongjun > mosaics
diff mosaics.xml @ 0:b2567f7ff12f
Uploaded
author | dongjun |
---|---|
date | Wed, 21 Sep 2011 03:27:06 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mosaics.xml Wed Sep 21 03:27:06 2011 -0400 @@ -0,0 +1,241 @@ +<tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="1.0.0"> + + <description></description> + + <parallelism method="basic"></parallelism> + + <requirements> + <requirement type="binary">R</requirement> + </requirements> + + <command interpreter="perl"> + mosaics_wrapper.pl + ## input file name (chip and control) + $chipParams.chip + $controlParams.control + ## input file format (chip and control) + $chipParams.chipFileFormat + $controlParams.controlFileFormat + ## peak file name + $out_peak + ## peak file format + $OutfileFormat + ## analysis type + IO + ## optional output + $report_summary + $report_gof + $report_exploratory + ## settings for model fitting and peak calling: required (0.05, 200, 50) + $fdrLevel + $fragLen + $binSize + $capping + ## settings for model fitting and peak calling: optional + #if $fitParams.fSettingsType == "preSet" + BIC + 0.25 + 200 + 50 + 10 + #else + $fitParams.signalModel + $fitParams.d + $fitParams.maxgap + $fitParams.minsize + $fitParams.thres + #end if + ## Number of cores to use + 8 + </command> + + <inputs> + <conditional name="chipParams"> + <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS can accept aligned read files."> + <option value="eland_result">Eland result</option> + <option value="eland_extended">Eland extended</option> + <option value="eland_export">Eland export</option> + <option value="bowtie">Bowtie default</option> + <option value="sam">SAM</option> + </param> + <when value="eland_result"> + <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/> + </when> + <when value="eland_extended"> + <param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/> + </when> + <when value="eland_export"> + <param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/> + </when> + <when value="bowtie"> + <param name="chip" type="data" label="Bowtie default file for ChIP sample"/> + </when> + <when value="sam"> + <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/> + </when> + </conditional> <!-- chipParams --> + <conditional name="controlParams"> + <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS can accept aligned read files."> + <option value="eland_result">Eland result</option> + <option value="eland_extended">Eland extended</option> + <option value="eland_export">Eland export</option> + <option value="bowtie">Bowtie default</option> + <option value="sam">SAM</option> + </param> + <when value="eland_result"> + <param name="control" type="data" format="eland" label="Eland result file for control sample"/> + </when> + <when value="eland_extended"> + <param name="control" type="data" format="eland" label="Eland extended file for control sample"/> + </when> + <when value="eland_export"> + <param name="control" type="data" format="eland" label="Eland export file for control sample"/> + </when> + <when value="bowtie"> + <param name="control" type="data" label="Bowtie default file for control sample"/> + </when> + <when value="sam"> + <param name="control" type="data" format="sam" label="SAM file for control sample"/> + </when> + </conditional> <!-- inputParams --> + + <param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table."> + <option value="bed">BED</option> + <option value="gff">GFF</option> + <option value="txt">table</option> + </param> + <param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" /> + <param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" /> + <param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" /> + + <param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" /> + <param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." /> + <param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." /> + <param name="capping" type="integer" value="3" label="Maximum number of reads allowed to start at each nucleotide position" help="Small value (e.g., 3) are recommended for the ChIP-seq data with low sequencing depth and large value (e.g., 10000) for the ChIP-seq data with high sequencing depth." /> + + <conditional name="fitParams"> + <param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'."> + <option value="preSet">Commonly used</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> + <when value="full"> + <param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC. Instead, user can specify signal model among one or two signal component models."> + <option value="BIC">Automatic model selection based on BIC</option> + <option value="1S">One-signal-component model</option> + <option value="2S">Two-signal-component model</option> + </param> + <param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." /> + <param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." /> + <param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." /> + <param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." /> + </when> <!-- full --> + </conditional> <!-- fitParams --> + </inputs> + + <outputs> + <data format="tabular" name="out_peak"> + <change_format> + <when input="OutfileFormat" value="bed" format="bed" /> + <when input="OutfileFormat" value="gff" format="gff" /> + </change_format> + </data> + <data format="txt" name="report_summary"> + <filter>summary == 1</filter> + </data> + <data format="pdf" name="report_gof"> + <filter>gof == 1</filter> + </data> + <data format="pdf" name="report_exploratory"> + <filter>exploratory == 1</filter> + </data> + </outputs> + + <help> + +**What it does** + +MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions). +MOSAiCS is also available in Bioconductor_ as a R package. +We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_. + +Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," To appear in the *Journal of the American Statistical Association*. + +.. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.8/bioc/html/mosaics.html +.. _Google group: http://groups.google.com/group/mosaics_user_group +.. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706 + +------ + +**Input formats** + +MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts single-end reads, in Eland result, Eland extended, Eland export, Bowtie default, and SAM formats. + +------ + +**Outputs** + +Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak. + +If the output is a table, it has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 Chromosome of the peak + 2 Start position of the peak + 3 End position of the peak + 4 Width of the peak + 5 Averaged posterior probability of the peak + 6 Minimum posterior probability of the peak + 7 Averaged ChIP tag counts of the peak + 8 Maximum ChIP tag counts of the peak + 9 Averaged control tag counts of the peak + 10 Averaged control tag counts of the peak, scaled by sequencing depth + 11 Averaged log base 2 ratio of ChIP over input tag counts + +If the output is in BED format, it has the following columns:: + + Column Description + ------------ -------------------------------------------------------- + 1 chrom Chromosome of the peak + 2 chromStart Start position of the peak + 3 chromEnd End position of the peak + 4 name Always "MOSAiCS_peak" + 5 score Averaged ChIP tag counts of the peak + +If the output is in GFF format, it has the following columns:: + + Column Description + --------- -------------------------------------------------------- + 1 seqname Chromosome of the peak + 2 source Always "MOSAiCS" + 3 feature Always "MOSAiCS_peak" + 4 start Start position of the peak + 5 end End position of the peak + 6 score Averaged ChIP tag counts of the peak + 7 strand Always "." + 8 frame Always "." + 9 group Always "." + +------ + +**Reports for diagnostics** + +*Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results. + +*Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data. + +*Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts. + +More details regarding these reports can be found here_: + +------ + +**Settings for model fitting and peak calling** + +More details about the tuning of these parameters can be found here_: + +.. _here: http://www.bioconductor.org/packages/2.8/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf + + </help> +</tool>