Mercurial > repos > dongjun > mosaics
view mosaics.xml @ 4:f1880e32460e draft
Uploaded
author | dongjun |
---|---|
date | Thu, 10 Jan 2013 15:57:13 -0500 |
parents | 95a657f15ba7 |
children |
line wrap: on
line source
<tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="2.0.0"> <description></description> <parallelism method="basic"></parallelism> <requirements> <requirement type="binary">R</requirement> </requirements> <command interpreter="perl"> mosaics_wrapper.pl ## ChIP file info $readFileType.chipParams.chip $readFileType.chipParams.chipFileFormat ## control file info $readFileType.controlParams.control $readFileType.controlParams.controlFileFormat ## peak file info $out_peak $OutfileFormat ## analysis type IO ## optional output $report_summary $report_gof $report_exploratory ## settings for model fitting and peak calling: required (FALSE, FALSE, 0.05, 200, 50, 0) $readFileType.pet $by_chr $fdrLevel $fragLen $binSize $capping #if $fitParams.fSettingsType == "preSet" ## settings for model fitting and peak calling: optional BIC automatic 0.25 200 50 10 ## setting for parallel computing TRUE 8 #else $fitParams.signalModel $fitParams.bgEst $fitParams.d $fitParams.maxgap $fitParams.minsize $fitParams.thres $fitParams.parallel $fitParams.nCore #end if </command> <inputs> <conditional name="readFileType"> <param name="pet" type="select" label="Paired-end tag (PET) or single-end tag (SET) data"> <option value="FALSE">Single-end tag (SET) data</option> <option value="TRUE">Paired-end tag (PET) data</option> </param> <when value="FALSE"> <!-- SET --> <conditional name="chipParams"> <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM file formats for single-end tag (SET) data."> <option value="eland_result">Eland result</option> <option value="eland_extended">Eland extended</option> <option value="eland_export">Eland export</option> <option value="bowtie">Bowtie default</option> <option value="sam">SAM</option> <option value="bed">BED</option> <option value="csem">CSEM</option> </param> <when value="eland_result"> <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/> </when> <when value="eland_extended"> <param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/> </when> <when value="eland_export"> <param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/> </when> <when value="bowtie"> <param name="chip" type="data" label="Bowtie default file for ChIP sample"/> </when> <when value="sam"> <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/> </when> <when value="bed"> <param name="chip" type="data" format="bed" label="BED file for ChIP sample"/> </when> <when value="csem"> <param name="chip" type="data" format="csem" label="CSEM file for ChIP sample"/> </when> </conditional> <!-- chipParams --> <conditional name="controlParams"> <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM file formats for single-end tag (SET) data."> <option value="eland_result">Eland result</option> <option value="eland_extended">Eland extended</option> <option value="eland_export">Eland export</option> <option value="bowtie">Bowtie default</option> <option value="sam">SAM</option> <option value="bed">BED</option> <option value="csem">CSEM</option> </param> <when value="eland_result"> <param name="control" type="data" format="eland" label="Eland result file for control sample"/> </when> <when value="eland_extended"> <param name="control" type="data" format="eland" label="Eland extended file for control sample"/> </when> <when value="eland_export"> <param name="control" type="data" format="eland" label="Eland export file for control sample"/> </when> <when value="bowtie"> <param name="control" type="data" label="Bowtie default file for control sample"/> </when> <when value="sam"> <param name="control" type="data" format="sam" label="SAM file for control sample"/> </when> <when value="bed"> <param name="control" type="data" format="bed" label="BED file for control sample"/> </when> <when value="csem"> <param name="control" type="data" format="csem" label="CSEM file for control sample"/> </when> </conditional> <!-- controlParams --> </when> <when value="TRUE"> <!-- PET --> <conditional name="chipParams"> <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result and SAM file formats for paired-end tag (PET) data."> <option value="eland_result">Eland result</option> <option value="sam">SAM</option> </param> <when value="eland_result"> <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/> </when> <when value="sam"> <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/> </when> </conditional> <!-- chipParams --> <conditional name="controlParams"> <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result and SAM file formats for paired-end tag (PET) data."> <option value="eland_result">Eland result</option> <option value="sam">SAM</option> </param> <when value="eland_result"> <param name="control" type="data" format="eland" label="Eland result file for control sample"/> </when> <when value="sam"> <param name="control" type="data" format="sam" label="SAM file for control sample"/> </when> </conditional> <!-- controlParams --> </when> </conditional><!-- readFileType --> <param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table."> <option value="bed">BED</option> <option value="gff">GFF</option> <option value="txt">table</option> </param> <param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" /> <param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" /> <param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" /> <param name="by_chr" type="select" label="Genome-wide analysis or chromosome-wise analysis" help="If genome-wide analysis is used, one model is fitted for all the chromosomes. If chromosome-wise analysis is used, different model is fitted for each chromosome separately." > <option value="FALSE">Genome-wide analysis</option> <option value="TRUE">Chromosome-wise analysis</option> </param> <param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" /> <param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." /> <param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." /> <param name="capping" type="integer" value="0" label="Maximum number of reads allowed to start at each nucleotide position" help="If non-positive value is specified (e.g., 0), any number of reads are allowed at each nucleotide position (i.e., no filtering). By default, filtering is NOT used." /> <conditional name="fitParams"> <param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'."> <option value="preSet">Commonly used</option> <option value="full">Full parameter list</option> </param> <when value="preSet" /> <when value="full"> <param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC."> <option value="BIC">Automatic model selection based on BIC</option> <option value="1S">One-signal-component model</option> <option value="2S">Two-signal-component model</option> </param> <param name="bgEst" type="select" label="Background estimation approach" help="By default, background estimation approach is automatically determined based on the data."> <option value="automatic">Automatic selection based on the data</option> <option value="matchLow">Based on bins with low tag counts</option> <option value="rMOM">Robust method of moment (MOM)</option> </param> <param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." /> <param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." /> <param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." /> <param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." /> <param name="parallel" type="select" label="Use parallel computing?"> <option value="TRUE">Use parallel computing.</option> <option value="FALSE">NOT use parallel computing.</option> </param> <param name="nCore" type="integer" value="8" label="Number of CPUs" help="Number of CPUs used for parallel computing. Relevant only when parallel computing is used. Default is to use 8 CPUs." /> </when> <!-- full --> </conditional> <!-- fitParams --> </inputs> <outputs> <data format="tabular" name="out_peak"> <change_format> <when input="OutfileFormat" value="bed" format="bed" /> <when input="OutfileFormat" value="gff" format="gff" /> </change_format> </data> <data format="txt" name="report_summary"> <filter>summary == 1</filter> </data> <data format="pdf" name="report_gof"> <filter>gof == 1</filter> </data> <data format="pdf" name="report_exploratory"> <filter>exploratory == 1</filter> </data> </outputs> <help> **What it does** MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions). MOSAiCS is also available in Bioconductor_ as a R package. We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_. Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," *Journal of the American Statistical Association*, Vol. 106, pp. 891--903. .. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.11/bioc/html/mosaics.html .. _Google group: http://groups.google.com/group/mosaics_user_group .. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706 ------ **Input formats** MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM formats for single-end tag (SET) data. For paired-end tag (PET) data, MOSAiCS accepts Eland result and SAM formats. ------ **Outputs** Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak. If the output is a table, it has the following columns:: Column Description -------- -------------------------------------------------------- 1 Chromosome of the peak 2 Start position of the peak 3 End position of the peak 4 Width of the peak 5 Averaged posterior probability of the peak 6 Minimum posterior probability of the peak 7 Averaged ChIP tag counts of the peak 8 Maximum ChIP tag counts of the peak 9 Averaged control tag counts of the peak 10 Averaged control tag counts of the peak, scaled by sequencing depth 11 Averaged log base 2 ratio of ChIP over input tag counts If the output is in BED format, it has the following columns:: Column Description ------------ -------------------------------------------------------- 1 chrom Chromosome of the peak 2 chromStart Start position of the peak 3 chromEnd End position of the peak 4 name Always "MOSAiCS_peak" 5 score Averaged ChIP tag counts of the peak If the output is in GFF format, it has the following columns:: Column Description --------- -------------------------------------------------------- 1 seqname Chromosome of the peak 2 source Always "MOSAiCS" 3 feature Always "MOSAiCS_peak" 4 start Start position of the peak 5 end End position of the peak 6 score Averaged ChIP tag counts of the peak 7 strand Always "." 8 frame Always "." 9 group Always "." ------ **Reports for diagnostics** *Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results. *Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data. *Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts. More details regarding these reports can be found here_: ------ **Settings for model fitting and peak calling** More details about the tuning of these parameters can be found here_: .. _here: http://www.bioconductor.org/packages/2.11/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf </help> </tool>