view mosaics.xml @ 0:b2567f7ff12f

Uploaded
author dongjun
date Wed, 21 Sep 2011 03:27:06 -0400
parents
children
line wrap: on
line source

<tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="1.0.0">
  
  <description></description>
	
  <parallelism method="basic"></parallelism>
  
  <requirements>
	  <requirement type="binary">R</requirement>
  </requirements>

  <command interpreter="perl">
    mosaics_wrapper.pl 
      ## input file name (chip and control)
      $chipParams.chip
      $controlParams.control
      ## input file format (chip and control)
      $chipParams.chipFileFormat
      $controlParams.controlFileFormat
      ## peak file name
      $out_peak
      ## peak file format
      $OutfileFormat
      ## analysis type
      IO
      ## optional output
      $report_summary
      $report_gof
      $report_exploratory
      ## settings for model fitting and peak calling: required (0.05, 200, 50)
      $fdrLevel
      $fragLen
      $binSize
      $capping
      ## settings for model fitting and peak calling: optional
      #if $fitParams.fSettingsType == "preSet"
	BIC
	0.25
	200
	50
	10
      #else
	$fitParams.signalModel
	$fitParams.d
	$fitParams.maxgap
	$fitParams.minsize	
	$fitParams.thres
      #end if
      ## Number of cores to use
      8
  </command>

  <inputs>
	<conditional name="chipParams">
		<param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS can accept aligned read files.">
			<option value="eland_result">Eland result</option>
			<option value="eland_extended">Eland extended</option>
			<option value="eland_export">Eland export</option>
			<option value="bowtie">Bowtie default</option>
			<option value="sam">SAM</option>
		</param>
		<when value="eland_result">
			<param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
		</when>
		<when value="eland_extended">
			<param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/>
		</when>
		<when value="eland_export">
			<param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/>
		</when>
		<when value="bowtie">
			<param name="chip" type="data" label="Bowtie default file for ChIP sample"/>
		</when>
		<when value="sam">
			<param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
		</when>
	</conditional> <!-- chipParams -->
	<conditional name="controlParams">
		<param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS can accept aligned read files.">
			<option value="eland_result">Eland result</option>
			<option value="eland_extended">Eland extended</option>
			<option value="eland_export">Eland export</option>
			<option value="bowtie">Bowtie default</option>
			<option value="sam">SAM</option>
		</param>
		<when value="eland_result">
			<param name="control" type="data" format="eland" label="Eland result file for control sample"/>
		</when>
		<when value="eland_extended">
			<param name="control" type="data" format="eland" label="Eland extended file for control sample"/>
		</when>
		<when value="eland_export">
			<param name="control" type="data" format="eland" label="Eland export file for control sample"/>
		</when>
		<when value="bowtie">
			<param name="control" type="data" label="Bowtie default file for control sample"/>
		</when>
		<when value="sam">
			<param name="control" type="data" format="sam" label="SAM file for control sample"/>
		</when>
	</conditional> <!-- inputParams -->
	
	<param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table.">
		<option value="bed">BED</option>
		<option value="gff">GFF</option>
		<option value="txt">table</option>
	</param>
	<param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" />
	<param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" />
	<param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" />
	
	<param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" />
	<param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." />
	<param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." />
	<param name="capping" type="integer" value="3" label="Maximum number of reads allowed to start at each nucleotide position" help="Small value (e.g., 3) are recommended for the ChIP-seq data with low sequencing depth and large value (e.g., 10000) for the ChIP-seq data with high sequencing depth." />
	
	<conditional name="fitParams">
		<param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'.">
			<option value="preSet">Commonly used</option>
			<option value="full">Full parameter list</option>
		</param>
		<when value="preSet" />
		<when value="full">
			<param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC. Instead, user can specify signal model among one or two signal component models.">
				<option value="BIC">Automatic model selection based on BIC</option>
				<option value="1S">One-signal-component model</option>
				<option value="2S">Two-signal-component model</option>
			</param>
			<param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." />
			<param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." />
			<param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." />
			<param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." />
		</when> <!-- full -->
	</conditional> <!-- fitParams -->
  </inputs>

  <outputs>
	<data format="tabular" name="out_peak">
		<change_format>
			<when input="OutfileFormat" value="bed" format="bed" />
			<when input="OutfileFormat" value="gff" format="gff" />
		</change_format>
	</data>
	<data format="txt" name="report_summary">
		<filter>summary == 1</filter>
	</data>
	<data format="pdf" name="report_gof">
		<filter>gof == 1</filter>
	</data>
	<data format="pdf" name="report_exploratory">
		<filter>exploratory == 1</filter>
	</data>
  </outputs>

  <help>

**What it does**

MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions).
MOSAiCS is also available in Bioconductor_ as a R package.
We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_.

Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," To appear in the *Journal of the American Statistical Association*.

.. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.8/bioc/html/mosaics.html
.. _Google group: http://groups.google.com/group/mosaics_user_group
.. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706

------

**Input formats**

MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts single-end reads, in Eland result, Eland extended, Eland export, Bowtie default, and SAM formats.

------

**Outputs**

Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak.

If the output is a table, it has the following columns::

   Column    Description
   --------  --------------------------------------------------------
     1       Chromosome of the peak
     2       Start position of the peak
     3       End position of the peak
     4       Width of the peak
     5       Averaged posterior probability of the peak
     6       Minimum posterior probability of the peak
     7       Averaged ChIP tag counts of the peak
     8       Maximum ChIP tag counts of the peak
     9       Averaged control tag counts of the peak
    10       Averaged control tag counts of the peak, scaled by sequencing depth
    11       Averaged log base 2 ratio of ChIP over input tag counts

If the output is in BED format, it has the following columns::

    Column        Description
    ------------  --------------------------------------------------------
    1 chrom       Chromosome of the peak
    2 chromStart  Start position of the peak
    3 chromEnd    End position of the peak
    4 name        Always "MOSAiCS_peak"
    5 score       Averaged ChIP tag counts of the peak

If the output is in GFF format, it has the following columns::

    Column     Description
    ---------  --------------------------------------------------------
    1 seqname  Chromosome of the peak
    2 source   Always "MOSAiCS"
    3 feature  Always "MOSAiCS_peak"
    4 start    Start position of the peak
    5 end      End position of the peak
    6 score    Averaged ChIP tag counts of the peak
    7 strand   Always "."
    8 frame    Always "."
    9 group    Always "."

------

**Reports for diagnostics**

*Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results. 

*Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data. 

*Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts.

More details regarding these reports can be found here_:

------

**Settings for model fitting and peak calling**

More details about the tuning of these parameters can be found here_:

.. _here: http://www.bioconductor.org/packages/2.8/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf

  </help>
</tool>