# HG changeset patch
# User lgueguen
# Date 1538384876 14400
# Node ID de6d0b7c17afda1ef605530750dd6389a1848fe8
# Parent d86ccac2a66049d6b63f8cf0f37c2a010b97b1df
release 1.6.3
diff -r d86ccac2a660 -r de6d0b7c17af README.md
--- a/README.md Wed May 17 05:09:10 2017 -0400
+++ b/README.md Mon Oct 01 05:07:56 2018 -0400
@@ -1,6 +1,6 @@
------------------------------------------------------------
-SARTools-Galaxy: a galaxy wrapper for SARTools version 1.3.2
------------------------------------------------------------
+--------------------------------------------------------------------------------------
+SARTools-Galaxy: a galaxy wrapper for SARTools (Statistical Analysis of RNA-Seq Tools)
+--------------------------------------------------------------------------------------
[![Build Status](https://travis-ci.org/PF2-pasteur-fr/SARTools-Galaxy.svg?branch=master)](https://travis-ci.org/PF2-pasteur-fr/SARTools-Galaxy)
@@ -12,10 +12,9 @@
Requirements:
-------------
- R (3.3.0 or higher), Bio-conductor package
- SARTools package (1.3.2)
- other R packages: DESeq2 (1.12.0 or higher), edgeR (3.12.0 or higher), genefilter, xtable and knitr
- Rscript
+ These Galaxy tools need:
+ - R and the following R packages: SARTools, DESeq2, edgeR, genefilter, xtable and knitr.
+ - Rscript and optparse package
SARTools can be downloaded on github (https://github.com/PF2-pasteur-fr/SARTools). More information about installation can be found at this url.
diff -r d86ccac2a660 -r de6d0b7c17af abims_sartools_deseq2.xml
--- a/abims_sartools_deseq2.xml Wed May 17 05:09:10 2017 -0400
+++ b/abims_sartools_deseq2.xml Mon Oct 01 05:07:56 2018 -0400
@@ -26,6 +26,7 @@
--typeTrans $advanced_parameters.typeTrans
--locfunc $advanced_parameters.locfunc
--colors $advanced_parameters.colors
+ --forceCairoGraph $advanced_parameters.forceCairoGraph
#end if
## ouputs
@COMMAND_OUTPUTS@
@@ -45,23 +46,25 @@
-
+
+
-
-
+
+
-
+
-
+
+
@@ -85,7 +88,7 @@
@@ -142,6 +145,7 @@
* **typeTrans:** method of transformation of the counts for the clustering and the PCA (default is "VST" for Variance Stabilizing Transformation, or "rlog" for Regularized Log Transformation);
* **locfunc:** function used for the estimation of the size factors (default is "median", or "shorth" from the genefilter package);
* **colors:** colors used for the figures (one per biological condition), 8 are given by default.
+ * **forceCairoGraph:** TRUE or FALSE (default) to force the use of cairo with options(bitmapType="cairo").
------------
diff -r d86ccac2a660 -r de6d0b7c17af abims_sartools_deseq2_wrapper.py
--- a/abims_sartools_deseq2_wrapper.py Wed May 17 05:09:10 2017 -0400
+++ b/abims_sartools_deseq2_wrapper.py Mon Oct 01 05:07:56 2018 -0400
@@ -33,6 +33,7 @@
parser.add_argument('--typeTrans')
parser.add_argument('--locfunc')
parser.add_argument('--colors')
+ parser.add_argument('--forceCairoGraph')
parser.add_argument('--figures_html')
parser.add_argument('--figures_html_files_path')
parser.add_argument('--tables_html')
@@ -57,6 +58,7 @@
typeTrans=args.typeTrans
locfunc=args.locfunc
colors=args.colors
+ forceCairoGraph=args.forceCairoGraph
figures_html=args.figures_html
figures_html_files_path=args.figures_html_files_path
tables_html=args.tables_html
@@ -99,6 +101,8 @@
cmd+="--locfunc %s " % (locfunc)
if colors:
cmd+="--colors %s " % (colors)
+ if forceCairoGraph:
+ cmd+="--forceCairoGraph %s " % (forceCairoGraph)
cmd+="> %s 2>&1" % (log)
print("Rscript command: %s") % (cmd)
os.system(cmd)
diff -r d86ccac2a660 -r de6d0b7c17af abims_sartools_edger.xml
--- a/abims_sartools_edger.xml Wed May 17 05:09:10 2017 -0400
+++ b/abims_sartools_edger.xml Mon Oct 01 05:07:56 2018 -0400
@@ -25,6 +25,7 @@
--geneSelection $advanced_parameters.geneSelection
--normalizationMethod $advanced_parameters.normalizationMethod
--colors $advanced_parameters.colors
+ --forceCairoGraph $advanced_parameters.forceCairoGraph
#end if
## ouputs
@COMMAND_OUTPUTS@
@@ -46,17 +47,18 @@
-
-
+
+
-
+
+
@@ -159,6 +161,7 @@
* **gene.selection:** method of selection of the features for the MultiDimensional Scaling plot ("pairwise" by default or common);
* **normalizationMethod:** normalization method in calcNormFactors(): "TMM" (default), "RLE" (DESeq method) or "upperquartile";
* **colors:** colors used for the figures (one per biological condition), 8 are given by default.
+ * **forceCairoGraph:** TRUE or FALSE (default) to force the use of cairo with options(bitmapType="cairo").
------------
diff -r d86ccac2a660 -r de6d0b7c17af abims_sartools_edger_wrapper.py
--- a/abims_sartools_edger_wrapper.py Wed May 17 05:09:10 2017 -0400
+++ b/abims_sartools_edger_wrapper.py Mon Oct 01 05:07:56 2018 -0400
@@ -31,6 +31,7 @@
parser.add_argument('--geneSelection')
parser.add_argument('--normalizationMethod')
parser.add_argument('--colors')
+ parser.add_argument('--forceCairoGraph')
parser.add_argument('--figures_html')
parser.add_argument('--figures_html_files_path')
parser.add_argument('--tables_html')
@@ -53,6 +54,7 @@
geneSelection=args.geneSelection
normalizationMethod=args.normalizationMethod
colors=args.colors
+ forceCairoGraph=args.forceCairoGraph
figures_html=args.figures_html
figures_html_files_path=args.figures_html_files_path
tables_html=args.tables_html
@@ -91,6 +93,8 @@
cmd+="--normalizationMethod %s " % (normalizationMethod)
if colors:
cmd+="--colors %s " % (colors)
+ if forceCairoGraph:
+ cmd+="--forceCairoGraph %s " % (forceCairoGraph)
cmd+="> %s 2>&1" % (log)
print("Rscript command: %s") % (cmd)
os.system(cmd)
diff -r d86ccac2a660 -r de6d0b7c17af macros.xml
--- a/macros.xml Wed May 17 05:09:10 2017 -0400
+++ b/macros.xml Mon Oct 01 05:07:56 2018 -0400
@@ -1,11 +1,11 @@
- 1.3.2
+ 1.6.3
- r-sartools
- r-optparse
+ r-sartools
+ r-optparse
@@ -52,28 +52,28 @@
-
+
\S+
-
+
\S+
-
-
-
+
+
+
\S+
-
+
\S+
-
+
\S+
-
+
@@ -84,11 +84,11 @@
-
+
-
+
@@ -100,11 +100,15 @@
-
+
\S+
+
+
+
+
diff -r d86ccac2a660 -r de6d0b7c17af pre_sartools.xml
--- a/pre_sartools.xml Wed May 17 05:09:10 2017 -0400
+++ b/pre_sartools.xml Mon Oct 01 05:07:56 2018 -0400
@@ -1,5 +1,8 @@
generate design/target file and archive for SARTools inputs
+
+
+
pre_sartools.py
--outfile=$outfile
diff -r d86ccac2a660 -r de6d0b7c17af template_script_DESeq2_CL.r
--- a/template_script_DESeq2_CL.r Wed May 17 05:09:10 2017 -0400
+++ b/template_script_DESeq2_CL.r Mon Oct 01 05:07:56 2018 -0400
@@ -1,192 +1,192 @@
-#!/local/gensoft2/exe/R/3.1.2/bin/Rscript
-
-# to run this script, use one of these commands:
-# Rscript --no-save --no-restore --verbose template_script_DESeq2_CL.r -r raw -v group -c T0 > log.txt 2>&1
-# Rscript template_script_DESeq2_CL.r -r raw -v group -c T0
-
-# to get help:
-# Rscript template_script_DESeq2_CL.r --help
-
-################################################################################
-### R script to compare several conditions with the SARTools and DESeq2 packages
-### Hugo Varet
-### April 20th, 2015
-### designed to be executed with SARTools 1.1.0
-################################################################################
-
-rm(list=ls()) # remove all the objects from the R session
-library(optparse) # to run the script in command lines
-
-# options list with associated default value.
-option_list <- list(
-make_option(c("-P", "--projectName"),
- default=basename(getwd()),
- dest="projectName",
- help="name of the project used for the report [default: name of the current directory]."),
-
-make_option(c("-A", "--author"),
- default=Sys.info()[7],
- dest="author",
- help="name of the report author [default: %default]."),
-
-make_option(c("-t", "--targetFile"),
- default="target.txt",
- dest="targetFile",
- help="path to the design/target file [default: %default]."),
-
-make_option(c("-r", "--rawDir"),
- default="raw",
- dest="rawDir",
- help="path to the directory containing the HTSeq files [default: %default]."),
-
-make_option(c("-F", "--featuresToRemove"),
- default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual",
- dest="FTR",
- help="names of the features to be removed, more than once can be specified [default: %default]"),
-
-make_option(c("-v", "--varInt"),
- default="group",
- dest="varInt",
- help="factor of interest [default: %default]"),
-
-make_option(c("-c", "--condRef"),
- default="WT",
- dest="condRef",
- help="reference biological condition [default: %default]"),
-
-make_option(c("-b", "--batch"),
- default=NULL,
- dest="batch",
- help="blocking factor [default: %default] or \"batch\" for example"),
-
-make_option(c("-f", "--fitType"),
- default="parametric",
- dest="fitType",
- help="mean-variance relationship: [default: %default] or local"),
-
-make_option(c("-o", "--cooksCutoff"),
- default=TRUE,
- dest="cooksCutoff",
- help="perform the outliers detection (default is TRUE)"),
-
-make_option(c("-i", "--independentFiltering"),
- default=TRUE,
- dest="independentFiltering",
- help="perform independent filtering (default is TRUE)"),
-
-make_option(c("-a", "--alpha"),
- default=0.05,
- dest="alpha",
- help="threshold of statistical significance [default: %default]"),
-
-make_option(c("-p", "--pAdjustMethod"),
- default="BH",
- dest="pAdjustMethod",
- help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"),
-
-make_option(c("-T", "--typeTrans"),
- default="VST",
- dest="typeTrans",
- help="transformation for PCA/clustering: \"VST\" ou \"rlog\" [default: %default]"),
-
-make_option(c("-l", "--locfunc"),
- default="median",
- dest="locfunc",
- help="median or shorth to estimate the size factors [default: %default]"),
-
-make_option(c("-C", "--colors"),
- default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange",
- dest="cols",
- help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]")
-)
-
-# now parse the command line to check which option is given and get associated values
-parser <- OptionParser(usage="usage: %prog [options]",
- option_list=option_list,
- description="Compare two or more biological conditions in a RNA-Seq framework with DESeq2.",
- epilogue="For comments, bug reports etc... please contact Hugo Varet ")
-opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options
-
-# get options and arguments
-workDir <- getwd()
-projectName <- opt$projectName # name of the project
-author <- opt$author # author of the statistical analysis/report
-targetFile <- opt$targetFile # path to the design/target file
-rawDir <- opt$rawDir # path to the directory containing raw counts files
-featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example)
-varInt <- opt$varInt # factor of interest
-condRef <- opt$condRef # reference biological condition
-batch <- opt$batch # blocking factor: NULL (default) or "batch" for example
-fitType <- opt$fitType # mean-variance relationship: "parametric" (default) or "local"
-cooksCutoff <- opt$cooksCutoff # outliers detection threshold (NULL to let DESeq2 choosing it)
-independentFiltering <- opt$independentFiltering # TRUE/FALSE to perform independent filtering (default is TRUE)
-alpha <- as.numeric(opt$alpha) # threshold of statistical significance
-pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY"
-typeTrans <- opt$typeTrans # transformation for PCA/clustering: "VST" ou "rlog"
-locfunc <- opt$locfunc # "median" (default) or "shorth" to estimate the size factors
-colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots
-
-# print(paste("workDir", workDir))
-# print(paste("projectName", projectName))
-# print(paste("author", author))
-# print(paste("targetFile", targetFile))
-# print(paste("rawDir", rawDir))
-# print(paste("varInt", varInt))
-# print(paste("condRef", condRef))
-# print(paste("batch", batch))
-# print(paste("fitType", fitType))
-# print(paste("cooksCutoff", cooksCutoff))
-# print(paste("independentFiltering", independentFiltering))
-# print(paste("alpha", alpha))
-# print(paste("pAdjustMethod", pAdjustMethod))
-# print(paste("typeTrans", typeTrans))
-# print(paste("locfunc", locfunc))
-# print(paste("featuresToRemove", featuresToRemove))
-# print(paste("colors", colors))
-
-################################################################################
-### running script ###
-################################################################################
-# setwd(workDir)
-library(SARTools)
-
-# checking parameters
-problem <- checkParameters.DESeq2(projectName=projectName,author=author,targetFile=targetFile,
- rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt,
- condRef=condRef,batch=batch,fitType=fitType,cooksCutoff=cooksCutoff,
- independentFiltering=independentFiltering,alpha=alpha,pAdjustMethod=pAdjustMethod,
- typeTrans=typeTrans,locfunc=locfunc,colors=colors)
-if (problem) quit(save="yes")
-
-# loading target file
-target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch)
-
-# loading counts
-counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove)
-
-# description plots
-majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors)
-
-# analysis with DESeq2
-out.DESeq2 <- run.DESeq2(counts=counts, target=target, varInt=varInt, batch=batch,
- locfunc=locfunc, fitType=fitType, pAdjustMethod=pAdjustMethod,
- cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha)
-
-# PCA + clustering
-exploreCounts(object=out.DESeq2$dds, group=target[,varInt], typeTrans=typeTrans, col=colors)
-
-# summary of the analysis (boxplots, dispersions, diag size factors, export table, nDiffTotal, histograms, MA plot)
-summaryResults <- summarizeResults.DESeq2(out.DESeq2, group=target[,varInt], col=colors,
- independentFiltering=independentFiltering,
- cooksCutoff=cooksCutoff, alpha=alpha)
-
-# save image of the R session
-save.image(file=paste0(projectName, ".RData"))
-
-# generating HTML report
-writeReport.DESeq2(target=target, counts=counts, out.DESeq2=out.DESeq2, summaryResults=summaryResults,
- majSequences=majSequences, workDir=workDir, projectName=projectName, author=author,
- targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt,
- condRef=condRef, batch=batch, fitType=fitType, cooksCutoff=cooksCutoff,
- independentFiltering=independentFiltering, alpha=alpha, pAdjustMethod=pAdjustMethod,
- typeTrans=typeTrans, locfunc=locfunc, colors=colors)
+################################################################################
+### R script to compare several conditions with the SARTools and DESeq2 packages
+### Hugo Varet
+### March 20th, 2018
+### designed to be executed with SARTools 1.6.3
+### run "Rscript template_script_DESeq2_CL.r --help" to get some help
+################################################################################
+
+rm(list=ls()) # remove all the objects from the R session
+library(optparse) # to run the script in command lines
+
+# options list with associated default value.
+option_list <- list(
+make_option(c("-P", "--projectName"),
+ default=basename(getwd()),
+ dest="projectName",
+ help="name of the project used for the report [default: name of the current directory]."),
+
+make_option(c("-A", "--author"),
+ default=Sys.info()[7],
+ dest="author",
+ help="name of the report author [default: %default]."),
+
+make_option(c("-t", "--targetFile"),
+ default="target.txt",
+ dest="targetFile",
+ help="path to the design/target file [default: %default]."),
+
+make_option(c("-r", "--rawDir"),
+ default="raw",
+ dest="rawDir",
+ help="path to the directory containing the HTSeq files [default: %default]."),
+
+make_option(c("-F", "--featuresToRemove"),
+ default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual",
+ dest="FTR",
+ help="names of the features to be removed, more than once can be specified [default: %default]"),
+
+make_option(c("-v", "--varInt"),
+ default="group",
+ dest="varInt",
+ help="factor of interest [default: %default]"),
+
+make_option(c("-c", "--condRef"),
+ default="WT",
+ dest="condRef",
+ help="reference biological condition [default: %default]"),
+
+make_option(c("-b", "--batch"),
+ default=NULL,
+ dest="batch",
+ help="blocking factor [default: %default] or \"batch\" for example"),
+
+make_option(c("-f", "--fitType"),
+ default="parametric",
+ dest="fitType",
+ help="mean-variance relationship: [default: %default], local or mean"),
+
+make_option(c("-o", "--cooksCutoff"),
+ default=TRUE,
+ dest="cooksCutoff",
+ help="perform the outliers detection (default is TRUE)"),
+
+make_option(c("-i", "--independentFiltering"),
+ default=TRUE,
+ dest="independentFiltering",
+ help="perform independent filtering (default is TRUE)"),
+
+make_option(c("-a", "--alpha"),
+ default=0.05,
+ dest="alpha",
+ help="threshold of statistical significance [default: %default]"),
+
+make_option(c("-p", "--pAdjustMethod"),
+ default="BH",
+ dest="pAdjustMethod",
+ help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"),
+
+make_option(c("-T", "--typeTrans"),
+ default="VST",
+ dest="typeTrans",
+ help="transformation for PCA/clustering: \"VST\" ou \"rlog\" [default: %default]"),
+
+make_option(c("-l", "--locfunc"),
+ default="median",
+ dest="locfunc",
+ help="median or shorth to estimate the size factors [default: %default]"),
+
+make_option(c("-C", "--colors"),
+ default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange",
+ dest="cols",
+ help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]"),
+
+make_option(c("-g", "--forceCairoGraph"),
+ action="store_true",
+ default=FALSE,
+ dest="forceCairoGraph",
+ help="activate cairo type")
+
+)
+
+# now parse the command line to check which option is given and get associated values
+parser <- OptionParser(usage="usage: %prog [options]",
+ option_list=option_list,
+ description="Compare two or more biological conditions in a RNA-Seq framework with DESeq2.",
+ epilogue="For comments, bug reports etc... please contact Hugo Varet ")
+opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options
+
+# get options and arguments
+workDir <- getwd()
+projectName <- opt$projectName # name of the project
+author <- opt$author # author of the statistical analysis/report
+targetFile <- opt$targetFile # path to the design/target file
+rawDir <- opt$rawDir # path to the directory containing raw counts files
+featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example)
+varInt <- opt$varInt # factor of interest
+condRef <- opt$condRef # reference biological condition
+batch <- opt$batch # blocking factor: NULL (default) or "batch" for example
+fitType <- opt$fitType # mean-variance relationship: "parametric" (default), "local" or "mean"
+cooksCutoff <- opt$cooksCutoff # outliers detection threshold (NULL to let DESeq2 choosing it)
+independentFiltering <- opt$independentFiltering # TRUE/FALSE to perform independent filtering (default is TRUE)
+alpha <- as.numeric(opt$alpha) # threshold of statistical significance
+pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY"
+typeTrans <- opt$typeTrans # transformation for PCA/clustering: "VST" ou "rlog"
+locfunc <- opt$locfunc # "median" (default) or "shorth" to estimate the size factors
+colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots
+forceCairoGraph <- opt$forceCairoGraph # force cairo as plotting device if enabled
+# print(paste("workDir", workDir))
+# print(paste("projectName", projectName))
+# print(paste("author", author))
+# print(paste("targetFile", targetFile))
+# print(paste("rawDir", rawDir))
+# print(paste("varInt", varInt))
+# print(paste("condRef", condRef))
+# print(paste("batch", batch))
+# print(paste("fitType", fitType))
+# print(paste("cooksCutoff", cooksCutoff))
+# print(paste("independentFiltering", independentFiltering))
+# print(paste("alpha", alpha))
+# print(paste("pAdjustMethod", pAdjustMethod))
+# print(paste("typeTrans", typeTrans))
+# print(paste("locfunc", locfunc))
+# print(paste("featuresToRemove", featuresToRemove))
+# print(paste("colors", colors))
+
+################################################################################
+### running script ###
+################################################################################
+# setwd(workDir)
+library(SARTools)
+if (forceCairoGraph) options(bitmapType="cairo")
+
+# checking parameters
+problem <- checkParameters.DESeq2(projectName=projectName,author=author,targetFile=targetFile,
+ rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt,
+ condRef=condRef,batch=batch,fitType=fitType,cooksCutoff=cooksCutoff,
+ independentFiltering=independentFiltering,alpha=alpha,pAdjustMethod=pAdjustMethod,
+ typeTrans=typeTrans,locfunc=locfunc,colors=colors)
+if (problem) quit(save="yes")
+
+# loading target file
+target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch)
+
+# loading counts
+counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove)
+
+# description plots
+majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors)
+
+# analysis with DESeq2
+out.DESeq2 <- run.DESeq2(counts=counts, target=target, varInt=varInt, batch=batch,
+ locfunc=locfunc, fitType=fitType, pAdjustMethod=pAdjustMethod,
+ cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha)
+
+# PCA + clustering
+exploreCounts(object=out.DESeq2$dds, group=target[,varInt], typeTrans=typeTrans, col=colors)
+
+# summary of the analysis (boxplots, dispersions, diag size factors, export table, nDiffTotal, histograms, MA plot)
+summaryResults <- summarizeResults.DESeq2(out.DESeq2, group=target[,varInt], col=colors,
+ independentFiltering=independentFiltering,
+ cooksCutoff=cooksCutoff, alpha=alpha)
+
+# save image of the R session
+save.image(file=paste0(projectName, ".RData"))
+
+# generating HTML report
+writeReport.DESeq2(target=target, counts=counts, out.DESeq2=out.DESeq2, summaryResults=summaryResults,
+ majSequences=majSequences, workDir=workDir, projectName=projectName, author=author,
+ targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt,
+ condRef=condRef, batch=batch, fitType=fitType, cooksCutoff=cooksCutoff,
+ independentFiltering=independentFiltering, alpha=alpha, pAdjustMethod=pAdjustMethod,
+ typeTrans=typeTrans, locfunc=locfunc, colors=colors)
diff -r d86ccac2a660 -r de6d0b7c17af template_script_edgeR_CL.r
--- a/template_script_edgeR_CL.r Wed May 17 05:09:10 2017 -0400
+++ b/template_script_edgeR_CL.r Mon Oct 01 05:07:56 2018 -0400
@@ -1,175 +1,174 @@
-#!/local/gensoft2/exe/R/3.1.2/bin/Rscript
-
-# to run this script, use one of these commands:
-# Rscript --no-save --no-restore --verbose template_script_edgeR_CL.r -r raw -v group -c T0 > log.txt 2>&1
-# Rscript template_script_edgeR_CL.r -r raw -v group -c T0
-
-# to get help:
-# Rscript template_script_edgeR_CL.r --help
-
-################################################################################
-### R script to compare several conditions with the SARTools and edgeR packages
-### Hugo Varet
-### April 20th, 2015
-### designed to be executed with SARTools 1.1.0
-################################################################################
-
-rm(list=ls()) # remove all the objects from the R session
-library(optparse) # to run the script in command lines
-
-# options list with associated default value.
-option_list <- list(
-make_option(c("-P", "--projectName"),
- default=basename(getwd()),
- dest="projectName",
- help="name of the project used for the report [default: name of the current directory]."),
-
-make_option(c("-A", "--author"),
- default=Sys.info()[7],
- dest="author",
- help="name of the report author [default: %default]."),
-
-make_option(c("-t", "--targetFile"),
- default="target.txt",
- dest="targetFile",
- help="path to the design/target file [default: %default]."),
-
-make_option(c("-r", "--rawDir"),
- default="raw",
- dest="rawDir",
- help="path to the directory containing the HTSeq files [default: %default]."),
-
-make_option(c("-F", "--featuresToRemove"),
- default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual",
- dest="FTR",
- help="names of the features to be removed, more than once can be specified [default: %default]"),
-
-make_option(c("-v", "--varInt"),
- default="group",
- dest="varInt",
- help="factor of interest [default: %default]"),
-
-make_option(c("-c", "--condRef"),
- default="WT",
- dest="condRef",
- help="reference biological condition [default: %default]"),
-
-make_option(c("-b", "--batch"),
- default=NULL,
- dest="batch",
- help="blocking factor [default: %default] or \"batch\" for example"),
-
-make_option(c("-a", "--alpha"),
- default=0.05,
- dest="alpha",
- help="threshold of statistical significance [default: %default]"),
-
-make_option(c("-p", "--pAdjustMethod"),
- default="BH",
- dest="pAdjustMethod",
- help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"),
-
-make_option(c("-m", "--cpmCutoff"),
- default=1,
- dest="cpmCutoff",
- help="counts-per-million cut-off to filter low counts"),
-
-make_option(c("-g", "--gene.selection"),
- default="pairwise",
- dest="gene.selection",
- help="selection of the features in MDSPlot [default: %default]"),
-
-make_option(c("-n", "--normalizationMethod"),
- default="TMM",
- dest="normalizationMethod",
- help="normalization method in calcNormFactors: \"TMM\", \"RLE\" or \"upperquartile\" [default: %default]"),
-
-make_option(c("-C", "--colors"),
- default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange",
- dest="cols",
- help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]")
-)
-
-# now parse the command line to check which option is given and get associated values
-parser <- OptionParser(usage="usage: %prog [options]",
- option_list=option_list,
- description="Compare two or more biological conditions in a RNA-Seq framework with edgeR.",
- epilogue="For comments, bug reports etc... please contact Hugo Varet ")
-opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options
-
-# get options and arguments
-workDir <- getwd()
-projectName <- opt$projectName # name of the project
-author <- opt$author # author of the statistical analysis/report
-targetFile <- opt$targetFile # path to the design/target file
-rawDir <- opt$rawDir # path to the directory containing raw counts files
-featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example)
-varInt <- opt$varInt # factor of interest
-condRef <- opt$condRef # reference biological condition
-batch <- opt$batch # blocking factor: NULL (default) or "batch" for example
-alpha <- as.numeric(opt$alpha) # threshold of statistical significance
-pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY"
-gene.selection <- opt$gene.selection # selection of the features in MDSPlot
-normalizationMethod <- opt$normalizationMethod # normalization method in calcNormFactors
-cpmCutoff <- opt$cpmCutoff # counts-per-million cut-off to filter low counts
-colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots
-
-# print(paste("workDir", workDir))
-# print(paste("projectName", projectName))
-# print(paste("author", author))
-# print(paste("targetFile", targetFile))
-# print(paste("rawDir", rawDir))
-# print(paste("varInt", varInt))
-# print(paste("condRef", condRef))
-# print(paste("batch", batch))
-# print(paste("alpha", alpha))
-# print(paste("pAdjustMethod", pAdjustMethod))
-# print(paste("featuresToRemove", featuresToRemove))
-# print(paste("colors", colors))
-# print(paste("gene.selection", gene.selection))
-# print(paste("normalizationMethod", normalizationMethod))
-# print(paste("cpmCutoff", cpmCutoff))
-
-################################################################################
-### running script ###
-################################################################################
-# setwd(workDir)
-library(SARTools)
-
-# checking parameters
-problem <- checkParameters.edgeR(projectName=projectName,author=author,targetFile=targetFile,
- rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt,
- condRef=condRef,batch=batch,alpha=alpha,pAdjustMethod=pAdjustMethod,
- cpmCutoff=cpmCutoff,gene.selection=gene.selection,
- normalizationMethod=normalizationMethod,colors=colors)
-if (problem) quit(save="yes")
-
-# loading target file
-target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch)
-
-# loading counts
-counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove)
-
-# description plots
-majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors)
-
-# edgeR analysis
-out.edgeR <- run.edgeR(counts=counts, target=target, varInt=varInt, condRef=condRef,
- batch=batch, cpmCutoff=cpmCutoff, normalizationMethod=normalizationMethod,
- pAdjustMethod=pAdjustMethod)
-
-# MDS + clustering
-exploreCounts(object=out.edgeR$dge, group=target[,varInt], gene.selection=gene.selection, col=colors)
-
-# summary of the analysis (boxplots, dispersions, export table, nDiffTotal, histograms, MA plot)
-summaryResults <- summarizeResults.edgeR(out.edgeR, group=target[,varInt], counts=counts, alpha=alpha, col=colors)
-
-# save image of the R session
-save.image(file=paste0(projectName, ".RData"))
-
-# generating HTML report
-writeReport.edgeR(target=target, counts=counts, out.edgeR=out.edgeR, summaryResults=summaryResults,
- majSequences=majSequences, workDir=workDir, projectName=projectName, author=author,
- targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt,
- condRef=condRef, batch=batch, alpha=alpha, pAdjustMethod=pAdjustMethod, colors=colors,
- gene.selection=gene.selection, normalizationMethod=normalizationMethod)
+################################################################################
+### R script to compare several conditions with the SARTools and edgeR packages
+### Hugo Varet
+### May 16th, 2018
+### designed to be executed with SARTools 1.6.3
+### run "Rscript template_script_edgeR_CL.r --help" to get some help
+################################################################################
+
+rm(list=ls()) # remove all the objects from the R session
+library(optparse) # to run the script in command lines
+
+# options list with associated default value.
+option_list <- list(
+make_option(c("-P", "--projectName"),
+ default=basename(getwd()),
+ dest="projectName",
+ help="name of the project used for the report [default: name of the current directory]."),
+
+make_option(c("-A", "--author"),
+ default=Sys.info()[7],
+ dest="author",
+ help="name of the report author [default: %default]."),
+
+make_option(c("-t", "--targetFile"),
+ default="target.txt",
+ dest="targetFile",
+ help="path to the design/target file [default: %default]."),
+
+make_option(c("-r", "--rawDir"),
+ default="raw",
+ dest="rawDir",
+ help="path to the directory containing the HTSeq files [default: %default]."),
+
+make_option(c("-F", "--featuresToRemove"),
+ default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual",
+ dest="FTR",
+ help="names of the features to be removed, more than once can be specified [default: %default]"),
+
+make_option(c("-v", "--varInt"),
+ default="group",
+ dest="varInt",
+ help="factor of interest [default: %default]"),
+
+make_option(c("-c", "--condRef"),
+ default="WT",
+ dest="condRef",
+ help="reference biological condition [default: %default]"),
+
+make_option(c("-b", "--batch"),
+ default=NULL,
+ dest="batch",
+ help="blocking factor [default: %default] or \"batch\" for example"),
+
+make_option(c("-a", "--alpha"),
+ default=0.05,
+ dest="alpha",
+ help="threshold of statistical significance [default: %default]"),
+
+make_option(c("-p", "--pAdjustMethod"),
+ default="BH",
+ dest="pAdjustMethod",
+ help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"),
+
+make_option(c("-m", "--cpmCutoff"),
+ default=1,
+ dest="cpmCutoff",
+ help="counts-per-million cut-off to filter low counts"),
+
+make_option(c("-g", "--gene.selection"),
+ default="pairwise",
+ dest="gene.selection",
+ help="selection of the features in MDSPlot [default: %default]"),
+
+make_option(c("-n", "--normalizationMethod"),
+ default="TMM",
+ dest="normalizationMethod",
+ help="normalization method in calcNormFactors: \"TMM\", \"RLE\" or \"upperquartile\" [default: %default]"),
+
+make_option(c("-C", "--colors"),
+ default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange",
+ dest="cols",
+ help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]"),
+
+make_option(c("-f", "--forceCairoGraph"),
+ action="store_true",
+ default=FALSE,
+ dest="forceCairoGraph",
+ help="activate cairo type")
+)
+
+# now parse the command line to check which option is given and get associated values
+parser <- OptionParser(usage="usage: %prog [options]",
+ option_list=option_list,
+ description="Compare two or more biological conditions in a RNA-Seq framework with edgeR.",
+ epilogue="For comments, bug reports etc... please contact Hugo Varet ")
+opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options
+
+# get options and arguments
+workDir <- getwd()
+projectName <- opt$projectName # name of the project
+author <- opt$author # author of the statistical analysis/report
+targetFile <- opt$targetFile # path to the design/target file
+rawDir <- opt$rawDir # path to the directory containing raw counts files
+featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example)
+varInt <- opt$varInt # factor of interest
+condRef <- opt$condRef # reference biological condition
+batch <- opt$batch # blocking factor: NULL (default) or "batch" for example
+alpha <- as.numeric(opt$alpha) # threshold of statistical significance
+pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY"
+gene.selection <- opt$gene.selection # selection of the features in MDSPlot
+normalizationMethod <- opt$normalizationMethod # normalization method in calcNormFactors
+cpmCutoff <- opt$cpmCutoff # counts-per-million cut-off to filter low counts
+colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots
+forceCairoGraph <- opt$forceCairoGraph # force cairo as plotting device if enabled
+# print(paste("workDir", workDir))
+# print(paste("projectName", projectName))
+# print(paste("author", author))
+# print(paste("targetFile", targetFile))
+# print(paste("rawDir", rawDir))
+# print(paste("varInt", varInt))
+# print(paste("condRef", condRef))
+# print(paste("batch", batch))
+# print(paste("alpha", alpha))
+# print(paste("pAdjustMethod", pAdjustMethod))
+# print(paste("featuresToRemove", featuresToRemove))
+# print(paste("colors", colors))
+# print(paste("gene.selection", gene.selection))
+# print(paste("normalizationMethod", normalizationMethod))
+# print(paste("cpmCutoff", cpmCutoff))
+
+################################################################################
+### running script ###
+################################################################################
+# setwd(workDir)
+library(SARTools)
+if (forceCairoGraph) options(bitmapType="cairo")
+
+# checking parameters
+problem <- checkParameters.edgeR(projectName=projectName,author=author,targetFile=targetFile,
+ rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt,
+ condRef=condRef,batch=batch,alpha=alpha,pAdjustMethod=pAdjustMethod,
+ cpmCutoff=cpmCutoff,gene.selection=gene.selection,
+ normalizationMethod=normalizationMethod,colors=colors)
+if (problem) quit(save="yes")
+
+# loading target file
+target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch)
+
+# loading counts
+counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove)
+
+# description plots
+majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors)
+
+# edgeR analysis
+out.edgeR <- run.edgeR(counts=counts, target=target, varInt=varInt, condRef=condRef,
+ batch=batch, cpmCutoff=cpmCutoff, normalizationMethod=normalizationMethod,
+ pAdjustMethod=pAdjustMethod)
+
+# MDS + clustering
+exploreCounts(object=out.edgeR$dge, group=target[,varInt], gene.selection=gene.selection, col=colors)
+
+# summary of the analysis (boxplots, dispersions, export table, nDiffTotal, histograms, MA plot)
+summaryResults <- summarizeResults.edgeR(out.edgeR, group=target[,varInt], counts=counts, alpha=alpha, col=colors)
+
+# save image of the R session
+save.image(file=paste0(projectName, ".RData"))
+
+# generating HTML report
+writeReport.edgeR(target=target, counts=counts, out.edgeR=out.edgeR, summaryResults=summaryResults,
+ majSequences=majSequences, workDir=workDir, projectName=projectName, author=author,
+ targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt,
+ condRef=condRef, batch=batch, alpha=alpha, pAdjustMethod=pAdjustMethod, cpmCutoff=cpmCutoff,
+ colors=colors, gene.selection=gene.selection, normalizationMethod=normalizationMethod)