changeset 1:f0b6fb422967 draft

Deleted selected files
author dongjun
date Thu, 10 Jan 2013 15:55:39 -0500
parents b2567f7ff12f
children b6d0c6ceda2c
files mosaics.xml mosaics_1.0.7.tar.gz mosaics_wrapper.pl
diffstat 3 files changed, 0 insertions(+), 352 deletions(-) [+]
line wrap: on
line diff
--- a/mosaics.xml	Wed Sep 21 03:27:06 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,241 +0,0 @@
-<tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="1.0.0">
-  
-  <description></description>
-	
-  <parallelism method="basic"></parallelism>
-  
-  <requirements>
-	  <requirement type="binary">R</requirement>
-  </requirements>
-
-  <command interpreter="perl">
-    mosaics_wrapper.pl 
-      ## input file name (chip and control)
-      $chipParams.chip
-      $controlParams.control
-      ## input file format (chip and control)
-      $chipParams.chipFileFormat
-      $controlParams.controlFileFormat
-      ## peak file name
-      $out_peak
-      ## peak file format
-      $OutfileFormat
-      ## analysis type
-      IO
-      ## optional output
-      $report_summary
-      $report_gof
-      $report_exploratory
-      ## settings for model fitting and peak calling: required (0.05, 200, 50)
-      $fdrLevel
-      $fragLen
-      $binSize
-      $capping
-      ## settings for model fitting and peak calling: optional
-      #if $fitParams.fSettingsType == "preSet"
-	BIC
-	0.25
-	200
-	50
-	10
-      #else
-	$fitParams.signalModel
-	$fitParams.d
-	$fitParams.maxgap
-	$fitParams.minsize	
-	$fitParams.thres
-      #end if
-      ## Number of cores to use
-      8
-  </command>
-
-  <inputs>
-	<conditional name="chipParams">
-		<param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS can accept aligned read files.">
-			<option value="eland_result">Eland result</option>
-			<option value="eland_extended">Eland extended</option>
-			<option value="eland_export">Eland export</option>
-			<option value="bowtie">Bowtie default</option>
-			<option value="sam">SAM</option>
-		</param>
-		<when value="eland_result">
-			<param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
-		</when>
-		<when value="eland_extended">
-			<param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/>
-		</when>
-		<when value="eland_export">
-			<param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/>
-		</when>
-		<when value="bowtie">
-			<param name="chip" type="data" label="Bowtie default file for ChIP sample"/>
-		</when>
-		<when value="sam">
-			<param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
-		</when>
-	</conditional> <!-- chipParams -->
-	<conditional name="controlParams">
-		<param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS can accept aligned read files.">
-			<option value="eland_result">Eland result</option>
-			<option value="eland_extended">Eland extended</option>
-			<option value="eland_export">Eland export</option>
-			<option value="bowtie">Bowtie default</option>
-			<option value="sam">SAM</option>
-		</param>
-		<when value="eland_result">
-			<param name="control" type="data" format="eland" label="Eland result file for control sample"/>
-		</when>
-		<when value="eland_extended">
-			<param name="control" type="data" format="eland" label="Eland extended file for control sample"/>
-		</when>
-		<when value="eland_export">
-			<param name="control" type="data" format="eland" label="Eland export file for control sample"/>
-		</when>
-		<when value="bowtie">
-			<param name="control" type="data" label="Bowtie default file for control sample"/>
-		</when>
-		<when value="sam">
-			<param name="control" type="data" format="sam" label="SAM file for control sample"/>
-		</when>
-	</conditional> <!-- inputParams -->
-	
-	<param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table.">
-		<option value="bed">BED</option>
-		<option value="gff">GFF</option>
-		<option value="txt">table</option>
-	</param>
-	<param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" />
-	<param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" />
-	<param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" />
-	
-	<param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" />
-	<param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." />
-	<param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." />
-	<param name="capping" type="integer" value="3" label="Maximum number of reads allowed to start at each nucleotide position" help="Small value (e.g., 3) are recommended for the ChIP-seq data with low sequencing depth and large value (e.g., 10000) for the ChIP-seq data with high sequencing depth." />
-	
-	<conditional name="fitParams">
-		<param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'.">
-			<option value="preSet">Commonly used</option>
-			<option value="full">Full parameter list</option>
-		</param>
-		<when value="preSet" />
-		<when value="full">
-			<param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC. Instead, user can specify signal model among one or two signal component models.">
-				<option value="BIC">Automatic model selection based on BIC</option>
-				<option value="1S">One-signal-component model</option>
-				<option value="2S">Two-signal-component model</option>
-			</param>
-			<param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." />
-			<param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." />
-			<param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." />
-			<param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." />
-		</when> <!-- full -->
-	</conditional> <!-- fitParams -->
-  </inputs>
-
-  <outputs>
-	<data format="tabular" name="out_peak">
-		<change_format>
-			<when input="OutfileFormat" value="bed" format="bed" />
-			<when input="OutfileFormat" value="gff" format="gff" />
-		</change_format>
-	</data>
-	<data format="txt" name="report_summary">
-		<filter>summary == 1</filter>
-	</data>
-	<data format="pdf" name="report_gof">
-		<filter>gof == 1</filter>
-	</data>
-	<data format="pdf" name="report_exploratory">
-		<filter>exploratory == 1</filter>
-	</data>
-  </outputs>
-
-  <help>
-
-**What it does**
-
-MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions).
-MOSAiCS is also available in Bioconductor_ as a R package.
-We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_.
-
-Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," To appear in the *Journal of the American Statistical Association*.
-
-.. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.8/bioc/html/mosaics.html
-.. _Google group: http://groups.google.com/group/mosaics_user_group
-.. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706
-
-------
-
-**Input formats**
-
-MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts single-end reads, in Eland result, Eland extended, Eland export, Bowtie default, and SAM formats.
-
-------
-
-**Outputs**
-
-Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak.
-
-If the output is a table, it has the following columns::
-
-   Column    Description
-   --------  --------------------------------------------------------
-     1       Chromosome of the peak
-     2       Start position of the peak
-     3       End position of the peak
-     4       Width of the peak
-     5       Averaged posterior probability of the peak
-     6       Minimum posterior probability of the peak
-     7       Averaged ChIP tag counts of the peak
-     8       Maximum ChIP tag counts of the peak
-     9       Averaged control tag counts of the peak
-    10       Averaged control tag counts of the peak, scaled by sequencing depth
-    11       Averaged log base 2 ratio of ChIP over input tag counts
-
-If the output is in BED format, it has the following columns::
-
-    Column        Description
-    ------------  --------------------------------------------------------
-    1 chrom       Chromosome of the peak
-    2 chromStart  Start position of the peak
-    3 chromEnd    End position of the peak
-    4 name        Always "MOSAiCS_peak"
-    5 score       Averaged ChIP tag counts of the peak
-
-If the output is in GFF format, it has the following columns::
-
-    Column     Description
-    ---------  --------------------------------------------------------
-    1 seqname  Chromosome of the peak
-    2 source   Always "MOSAiCS"
-    3 feature  Always "MOSAiCS_peak"
-    4 start    Start position of the peak
-    5 end      End position of the peak
-    6 score    Averaged ChIP tag counts of the peak
-    7 strand   Always "."
-    8 frame    Always "."
-    9 group    Always "."
-
-------
-
-**Reports for diagnostics**
-
-*Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results. 
-
-*Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data. 
-
-*Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts.
-
-More details regarding these reports can be found here_:
-
-------
-
-**Settings for model fitting and peak calling**
-
-More details about the tuning of these parameters can be found here_:
-
-.. _here: http://www.bioconductor.org/packages/2.8/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf
-
-  </help>
-</tool>
Binary file mosaics_1.0.7.tar.gz has changed
--- a/mosaics_wrapper.pl	Wed Sep 21 03:27:06 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-# Wrapper for MOSAiCS
-# Written by Dongjun Chung, Sep. 15, 2011
-
-#!/usr/bin/env perl;
-use warnings;
-use strict;
-use File::Temp qw/tempfile/;
-use File::Temp qw/tempdir/;
-use File::Basename;
-
-# parse command arguments
-
-die "Usage: perl mosaics_wrapper.pl [chip_path] [control_path] [chip_file_format] [control_file_format] [peak_path] [peak_file_format] [analysis_type] [report_summary_path] [report_gof_path] [report_exploratory_path] [fdr_level] [frag_len] [bin_size] [capping] [signal_model] [d] [maxgap] [minsize] [thres] [n_core]" unless @ARGV == 20;
-
-my ( $chip_path, $control_path, $chip_file_format, $control_file_format, $peak_path, $peak_file_format, $analysis_type, $report_summary_path, $report_gof_path, $report_exploratory_path, $fdr_level, $frag_len, $bin_size, $capping, $signal_model, $d, $maxgap, $minsize, $thres, $n_core ) = @ARGV;
-
-# parse options: analysis type
-
-if ( $analysis_type ne "IO" ) {
-	print "Only 'IO' is supported for analysis type!\n";
-	exit 1;	
-}
-
-# parse options: ChIP, control, peak
-
-my ($chip_filename, $chip_dir) = fileparse($chip_path);
-my ($control_filename, $control_dir) = fileparse($control_path);
-my ($peak_filename, $peak_dir) = fileparse($peak_path);
-
-# parse options: report summary
-
-my $report_summary = "FALSE";
-my $summary_dir = "NULL";
-my $summary_filename = "NULL";
-if ( $report_summary_path ne "None" ) {
-	$report_summary = "TRUE";
-	($summary_filename, $summary_dir) = fileparse($report_summary_path);
-}
-
-# parse options: report GOF
-
-my $report_gof = "FALSE";
-my $gof_dir = "NULL";
-my $gof_filename = "NULL";
-if ( $report_gof_path ne "None" ) {
-	$report_gof = "TRUE";
-	($gof_filename, $gof_dir) = fileparse($report_gof_path);
-}
-
-# parse options: report exploratory analysis
-
-my $report_exploratory = "FALSE";
-my $exploratory_dir = "NULL";
-my $exploratory_filename = "NULL";
-if ( $report_exploratory_path ne "None" ) {
-	$report_exploratory = "TRUE";
-	($exploratory_filename, $exploratory_dir) = fileparse($report_exploratory_path);
-}
-
-# write a R scrip to run
-
-my $tempdir_bin = tempdir();
-
-my $cmd = qq|
-	suppressPackageStartupMessages(library(mosaics))
-	try( suppressPackageStartupMessages(library(rparallel)), silent=TRUE )
-	
-	mosaicsRunAll( 
-		chipDir="$chip_dir", 
-		chipFileName="$chip_filename", 
-		chipFileFormat="$chip_file_format", 
-		controlDir="$control_dir",
-		controlFileName="$control_filename", 
-		controlFileFormat="$control_file_format",
-		binfileDir="$tempdir_bin",
-		peakDir="$peak_dir",
-		peakFileName="$peak_filename",
-		peakFileFormat="$peak_file_format",
-		reportSummary=$report_summary, 
-		summaryDir="$summary_dir",
-		summaryFileName="$summary_filename",
-		reportExploratory=$report_exploratory,
-		exploratoryDir="$exploratory_dir",
-		exploratoryFileName="$exploratory_filename",
-		reportGOF=$report_gof,
-		gofDir="$gof_dir",
-		gofFileName="$gof_filename",
-		FDR=$fdr_level, 
-		fragLen=$frag_len, 
-		binSize=$bin_size, 
-		capping=$capping, 
-		analysisType="$analysis_type", 
-		d=$d, 
-		signalModel="$signal_model", 
-		maxgap=$maxgap, 
-		minsize=$minsize, 
-		thres=$thres, 
-		nCore=$n_core )
-	
-	q()
-	|;
-
-# run R
-
-open( FT, "| R --slave --vanilla >& /dev/null" ) or die "Couldn't call R!\n";
-print FT $cmd, "\n";
-close FT or die "Couldn't finish R!\n";
-
-exit;
-
-