Mercurial > repos > lgueguen > sartools
changeset 3:de6d0b7c17af draft
release 1.6.3
| author | lgueguen | 
|---|---|
| date | Mon, 01 Oct 2018 05:07:56 -0400 | 
| parents | d86ccac2a660 | 
| children | 05c9b1a7f44e | 
| files | README.md abims_sartools_deseq2.xml abims_sartools_deseq2_wrapper.py abims_sartools_edger.xml abims_sartools_edger_wrapper.py macros.xml pre_sartools.xml template_script_DESeq2_CL.r template_script_edgeR_CL.r | 
| diffstat | 9 files changed, 417 insertions(+), 397 deletions(-) [+] | 
line wrap: on
 line diff
--- a/README.md Wed May 17 05:09:10 2017 -0400 +++ b/README.md Mon Oct 01 05:07:56 2018 -0400 @@ -1,6 +1,6 @@ ------------------------------------------------------------ -SARTools-Galaxy: a galaxy wrapper for SARTools version 1.3.2 ------------------------------------------------------------ +-------------------------------------------------------------------------------------- +SARTools-Galaxy: a galaxy wrapper for SARTools (Statistical Analysis of RNA-Seq Tools) +-------------------------------------------------------------------------------------- [](https://travis-ci.org/PF2-pasteur-fr/SARTools-Galaxy) @@ -12,10 +12,9 @@ Requirements: ------------- - R (3.3.0 or higher), Bio-conductor package - SARTools package (1.3.2) - other R packages: DESeq2 (1.12.0 or higher), edgeR (3.12.0 or higher), genefilter, xtable and knitr - Rscript + These Galaxy tools need: + - R and the following R packages: SARTools, DESeq2, edgeR, genefilter, xtable and knitr. + - Rscript and optparse package SARTools can be downloaded on github (https://github.com/PF2-pasteur-fr/SARTools). More information about installation can be found at this url.
--- a/abims_sartools_deseq2.xml Wed May 17 05:09:10 2017 -0400 +++ b/abims_sartools_deseq2.xml Mon Oct 01 05:07:56 2018 -0400 @@ -26,6 +26,7 @@ --typeTrans $advanced_parameters.typeTrans --locfunc $advanced_parameters.locfunc --colors $advanced_parameters.colors + --forceCairoGraph $advanced_parameters.forceCairoGraph #end if ## ouputs @COMMAND_OUTPUTS@ @@ -45,23 +46,25 @@ <when value="hide" /> <when value="show"> <expand macro="batch_param" /> - <param name="fitType" type="select" label="Mean-variance relationship" help="(-f, --fitType) Type of model for the mean-dispersion relationship. Parametric by default." > + <param type="select" label="Mean-variance relationship" argument="--fitType" help="Type of model for the mean-dispersion relationship. Parametric by default." > <option value="parametric" selected="true">parametric</option> <option value="local">local</option> + <option value="mean">mean</option> </param> - <param name="cooksCutoff" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Perform the outliers detection" help="(-o, --cooksCutoff) Checked by default."/> - <param name="independentFiltering" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Perform independent filtering" help="(-i, --independentFiltering) Checked by default."/> + <param type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Perform the outliers detection" argument="--cooksCutoff" help="Checked by default."/> + <param type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Perform independent filtering" argument="--independentFiltering" help="Checked by default."/> <expand macro="alpha_param" /> <expand macro="padjustmethod_param" /> - <param name="typeTrans" type="select" label="Transformation for PCA/clustering" help="(-T --typeTrans) Method of transformation of the counts for the clustering and the PCA: 'VST' (default) for Variance Stabilizing Transformation, or 'rlog' for Regularized Log Transformation." > + <param type="select" label="Transformation for PCA/clustering" argument="--typeTrans" help="Method of transformation of the counts for the clustering and the PCA: 'VST' (default) for Variance Stabilizing Transformation, or 'rlog' for Regularized Log Transformation." > <option value="VST" selected="true">VST</option> <option value="rlog">rlog</option> </param> - <param name="locfunc" type="select" label="Estimation of the size factors" help="(-l --locfunc) 'median' (default) or 'shorth' from the genefilter package." > + <param type="select" label="Estimation of the size factors" argument="--locfunc" help="'median' (default) or 'shorth' from the genefilter package." > <option value="median" selected="true">median</option> <option value="shorth">shorth</option> </param> <expand macro="colors_param" /> + <expand macro="forceCairoGraph_param" /> </when> </conditional> @@ -85,7 +88,7 @@ <output name="log"> <assert_contents> <has_text text="KO vs WT 0.1 171" /> - <has_text text="KO vs WT 2584 2665 5249" /> + <has_text text="KO vs WT 2583 2663 5246" /> <has_text text="HTML report created" /> </assert_contents> </output> @@ -142,6 +145,7 @@ * **typeTrans:** method of transformation of the counts for the clustering and the PCA (default is "VST" for Variance Stabilizing Transformation, or "rlog" for Regularized Log Transformation); * **locfunc:** function used for the estimation of the size factors (default is "median", or "shorth" from the genefilter package); * **colors:** colors used for the figures (one per biological condition), 8 are given by default. + * **forceCairoGraph:** TRUE or FALSE (default) to force the use of cairo with options(bitmapType="cairo"). ------------
--- a/abims_sartools_deseq2_wrapper.py Wed May 17 05:09:10 2017 -0400 +++ b/abims_sartools_deseq2_wrapper.py Mon Oct 01 05:07:56 2018 -0400 @@ -33,6 +33,7 @@ parser.add_argument('--typeTrans') parser.add_argument('--locfunc') parser.add_argument('--colors') + parser.add_argument('--forceCairoGraph') parser.add_argument('--figures_html') parser.add_argument('--figures_html_files_path') parser.add_argument('--tables_html') @@ -57,6 +58,7 @@ typeTrans=args.typeTrans locfunc=args.locfunc colors=args.colors + forceCairoGraph=args.forceCairoGraph figures_html=args.figures_html figures_html_files_path=args.figures_html_files_path tables_html=args.tables_html @@ -99,6 +101,8 @@ cmd+="--locfunc %s " % (locfunc) if colors: cmd+="--colors %s " % (colors) + if forceCairoGraph: + cmd+="--forceCairoGraph %s " % (forceCairoGraph) cmd+="> %s 2>&1" % (log) print("Rscript command: %s") % (cmd) os.system(cmd)
--- a/abims_sartools_edger.xml Wed May 17 05:09:10 2017 -0400 +++ b/abims_sartools_edger.xml Mon Oct 01 05:07:56 2018 -0400 @@ -25,6 +25,7 @@ --geneSelection $advanced_parameters.geneSelection --normalizationMethod $advanced_parameters.normalizationMethod --colors $advanced_parameters.colors + --forceCairoGraph $advanced_parameters.forceCairoGraph #end if ## ouputs @COMMAND_OUTPUTS@ @@ -46,17 +47,18 @@ <expand macro="batch_param" /> <expand macro="alpha_param" /> <expand macro="padjustmethod_param" /> - <param name="cpmCutoff" type="integer" value="1" min="0" label="Counts-per-million cut-off to filter low counts" help="(-m, --cpmCutoff) Set to 0 to disable filtering. Default is 1." /> - <param name="geneSelection" type="select" label="Selection of the features in MDSPlot" help="(-g, --gene.selection) Default is 'pairwise'." > + <param type="integer" value="1" min="0" label="Counts-per-million cut-off to filter low counts" argument="--cpmCutoff" help="Set to 0 to disable filtering. Default is 1." /> + <param name="geneSelection" type="select" label="Selection of the features in MDSPlot" argument="--gene.selection" help="Default is 'pairwise'." > <option value="pairwise" selected="true">pairwise</option> <option value="common">common</option> </param> - <param name="normalizationMethod" type="select" label="Normalization method in calcNormFactors" help="(-n, --normalizationMethod) 'TMM' (default), 'RLE' (DESeq method) or 'upperquartile'." > + <param type="select" label="Normalization method in calcNormFactors" argument="--normalizationMethod" help="'TMM' (default), 'RLE' (DESeq method) or 'upperquartile'." > <option value="TMM" selected="true">TMM</option> <option value="RLE">RLE</option> <option value="upperquartile">upperquartile</option> </param> <expand macro="colors_param" /> + <expand macro="forceCairoGraph_param" /> </when> </conditional> @@ -159,6 +161,7 @@ * **gene.selection:** method of selection of the features for the MultiDimensional Scaling plot ("pairwise" by default or common); * **normalizationMethod:** normalization method in calcNormFactors(): "TMM" (default), "RLE" (DESeq method) or "upperquartile"; * **colors:** colors used for the figures (one per biological condition), 8 are given by default. + * **forceCairoGraph:** TRUE or FALSE (default) to force the use of cairo with options(bitmapType="cairo"). ------------
--- a/abims_sartools_edger_wrapper.py Wed May 17 05:09:10 2017 -0400 +++ b/abims_sartools_edger_wrapper.py Mon Oct 01 05:07:56 2018 -0400 @@ -31,6 +31,7 @@ parser.add_argument('--geneSelection') parser.add_argument('--normalizationMethod') parser.add_argument('--colors') + parser.add_argument('--forceCairoGraph') parser.add_argument('--figures_html') parser.add_argument('--figures_html_files_path') parser.add_argument('--tables_html') @@ -53,6 +54,7 @@ geneSelection=args.geneSelection normalizationMethod=args.normalizationMethod colors=args.colors + forceCairoGraph=args.forceCairoGraph figures_html=args.figures_html figures_html_files_path=args.figures_html_files_path tables_html=args.tables_html @@ -91,6 +93,8 @@ cmd+="--normalizationMethod %s " % (normalizationMethod) if colors: cmd+="--colors %s " % (colors) + if forceCairoGraph: + cmd+="--forceCairoGraph %s " % (forceCairoGraph) cmd+="> %s 2>&1" % (log) print("Rscript command: %s") % (cmd) os.system(cmd)
--- a/macros.xml Wed May 17 05:09:10 2017 -0400 +++ b/macros.xml Mon Oct 01 05:07:56 2018 -0400 @@ -1,11 +1,11 @@ <macros> - <token name="@WRAPPER_VERSION@">1.3.2</token> + <token name="@WRAPPER_VERSION@">1.6.3</token> <xml name="requirements"> <requirements> - <requirement type="package" version="1.3.2">r-sartools</requirement> - <requirement type="package" version="1.3.2">r-optparse</requirement> + <requirement type="package" version="1.6.3">r-sartools</requirement> + <requirement type="package" version="1.6.0">r-optparse</requirement> </requirements> </xml> @@ -52,28 +52,28 @@ </token> <macro name="basic_parameters"> - <param name="projectName" type="text" value="Project" label="Name of the project used for the report" help="(-P, --projectName) No space allowed." > + <param type="text" value="Project" label="Name of the project used for the report" argument="--projectName" help="No space allowed." > <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator> </param> - <param name="author" type="text" value="Galaxy" label="Name of the report author" help="(-A, --author) No space allowed." > + <param type="text" value="Galaxy" label="Name of the report author" argument="--author" help="No space allowed." > <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator> </param> - <param name="targetFile" type="data" format="txt" label="Design / target file" help="(-t, --targetFile) See the help section below for details on the required format." /> - <param name="rawDir" type="data" format="no_unzip.zip,zip" label="Zip file containing raw counts files" help="(-r, --rawDir) See the help section below for details on the required format." /> - <param name="featuresToRemove" type="text" size="100" value="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual" label="Names of the features to be removed" help="(-F, --featuresToRemove) Separate the features with a comma, no space allowed. More than once can be specified. Specific HTSeq-count information and rRNA for example. Default are 'alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual'." > + <param type="data" format="txt" label="Design / target file" argument="--targetFile" help="See the help section below for details on the required format." /> + <param type="data" format="no_unzip.zip,zip" label="Zip file containing raw counts files" argument="--rawDir" help="See the help section below for details on the required format." /> + <param type="text" size="100" value="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual" label="Names of the features to be removed" argument="--featuresToRemove" help="Separate the features with a comma, no space allowed. More than once can be specified. Specific HTSeq-count information and rRNA for example. Default are 'alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual'." > <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator> </param> - <param name="varInt" type="text" value="group" label="Factor of interest" help="(-v, --varInt) Biological condition in the target file. Default is 'group'." > + <param type="text" value="group" label="Factor of interest" argument="--varInt" help="Biological condition in the target file. Default is 'group'." > <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator> </param> - <param name="condRef" type="text" value="WT" label="Reference biological condition" help="(-c, --condRef) Reference biological condition used to compute fold-changes, must be one of the levels of 'Factor of interest'." > + <param type="text" value="WT" label="Reference biological condition" argument="--condRef" help="Reference biological condition used to compute fold-changes, must be one of the levels of 'Factor of interest'." > <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator> </param> </macro> <macro name="batch_param"> <conditional name="batch_condition"> - <param name="condition" type="boolean" checked="false" truevalue="batch" falsevalue="NULL" label="Add a blocking factor" help="(-b, --batch) Adjustment variable to use as a batch effect. Default: unchecked if no batch effect needs to be taken into account."/> + <param name="condition" type="boolean" checked="false" truevalue="batch" falsevalue="NULL" label="Add a blocking factor" argument="--batch" help="Adjustment variable to use as a batch effect. Default: unchecked if no batch effect needs to be taken into account."/> <when value="NULL" /> <when value="batch"> <param name="batch" type="text" value="batch" label="Blocking factor value" help="Must be a column of the target file" > @@ -84,11 +84,11 @@ </macro> <macro name="alpha_param"> - <param name="alpha" type="float" value="0.05" min="0" max="1" label="Threshold of statistical significance" help="(-a, --alpha) Significance threshold applied to the adjusted p-values to select the differentially expressed features. Default is 0.05. The comma is not allowed as decimal separator, use a point instead." /> + <param type="float" value="0.05" min="0" max="1" label="Threshold of statistical significance" argument="--alpha" help="Significance threshold applied to the adjusted p-values to select the differentially expressed features. Default is 0.05. The comma is not allowed as decimal separator, use a point instead." /> </macro> <macro name="padjustmethod_param"> - <param name="pAdjustMethod" type="select" label="p-value adjustment method" help="(-p, --pAdjustMethod) p-value adjustment method for multiple testing. 'BH' by default, 'BY' or any value of p.adjust.methods." > + <param type="select" label="p-value adjustment method" argument="--pAdjustMethod" help="p-value adjustment method for multiple testing. 'BH' by default, 'BY' or any value of p.adjust.methods." > <option value="BH" selected="true">BH</option> <option value="BY">BY</option> <option value="bonferroni">bonferroni</option> @@ -100,11 +100,15 @@ </macro> <macro name="colors_param"> - <param name="colors" type="text" size="100" value="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange" label="Colors of each biological condition on the plots: 'col1,col2,col3,col4'" help="(-C, --colors) Separate the colors with a comma, no space allowed. Default are 'dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange'." > + <param type="text" size="100" value="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange" label="Colors of each biological condition on the plots: 'col1,col2,col3,col4'" argument="--colors" help="Separate the colors with a comma, no space allowed. Default are 'dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange'." > <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator> </param> </macro> + <macro name="forceCairoGraph_param"> + <param type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Activate cairo type" argument="--forceCairoGraph" help="Unchecked by default." /> + </macro> + <macro name="outputs"> <data name="report_html" format="html" label="${tool.name} report" /> <data name="tables_html" format="html" label="${tool.name} tables" />
--- a/pre_sartools.xml Wed May 17 05:09:10 2017 -0400 +++ b/pre_sartools.xml Mon Oct 01 05:07:56 2018 -0400 @@ -1,5 +1,8 @@ <tool id="presartools" name="Preprocess files for SARTools" version="0.1.0"> <description>generate design/target file and archive for SARTools inputs</description> + <stdio> + <regex match="WARNING:galaxy.model:Datatype class not found" level="warning"/> + </stdio> <command interpreter="python"> pre_sartools.py --outfile=$outfile
--- a/template_script_DESeq2_CL.r Wed May 17 05:09:10 2017 -0400 +++ b/template_script_DESeq2_CL.r Mon Oct 01 05:07:56 2018 -0400 @@ -1,192 +1,192 @@ -#!/local/gensoft2/exe/R/3.1.2/bin/Rscript - -# to run this script, use one of these commands: -# Rscript --no-save --no-restore --verbose template_script_DESeq2_CL.r -r raw -v group -c T0 > log.txt 2>&1 -# Rscript template_script_DESeq2_CL.r -r raw -v group -c T0 - -# to get help: -# Rscript template_script_DESeq2_CL.r --help - -################################################################################ -### R script to compare several conditions with the SARTools and DESeq2 packages -### Hugo Varet -### April 20th, 2015 -### designed to be executed with SARTools 1.1.0 -################################################################################ - -rm(list=ls()) # remove all the objects from the R session -library(optparse) # to run the script in command lines - -# options list with associated default value. -option_list <- list( -make_option(c("-P", "--projectName"), - default=basename(getwd()), - dest="projectName", - help="name of the project used for the report [default: name of the current directory]."), - -make_option(c("-A", "--author"), - default=Sys.info()[7], - dest="author", - help="name of the report author [default: %default]."), - -make_option(c("-t", "--targetFile"), - default="target.txt", - dest="targetFile", - help="path to the design/target file [default: %default]."), - -make_option(c("-r", "--rawDir"), - default="raw", - dest="rawDir", - help="path to the directory containing the HTSeq files [default: %default]."), - -make_option(c("-F", "--featuresToRemove"), - default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual", - dest="FTR", - help="names of the features to be removed, more than once can be specified [default: %default]"), - -make_option(c("-v", "--varInt"), - default="group", - dest="varInt", - help="factor of interest [default: %default]"), - -make_option(c("-c", "--condRef"), - default="WT", - dest="condRef", - help="reference biological condition [default: %default]"), - -make_option(c("-b", "--batch"), - default=NULL, - dest="batch", - help="blocking factor [default: %default] or \"batch\" for example"), - -make_option(c("-f", "--fitType"), - default="parametric", - dest="fitType", - help="mean-variance relationship: [default: %default] or local"), - -make_option(c("-o", "--cooksCutoff"), - default=TRUE, - dest="cooksCutoff", - help="perform the outliers detection (default is TRUE)"), - -make_option(c("-i", "--independentFiltering"), - default=TRUE, - dest="independentFiltering", - help="perform independent filtering (default is TRUE)"), - -make_option(c("-a", "--alpha"), - default=0.05, - dest="alpha", - help="threshold of statistical significance [default: %default]"), - -make_option(c("-p", "--pAdjustMethod"), - default="BH", - dest="pAdjustMethod", - help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"), - -make_option(c("-T", "--typeTrans"), - default="VST", - dest="typeTrans", - help="transformation for PCA/clustering: \"VST\" ou \"rlog\" [default: %default]"), - -make_option(c("-l", "--locfunc"), - default="median", - dest="locfunc", - help="median or shorth to estimate the size factors [default: %default]"), - -make_option(c("-C", "--colors"), - default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange", - dest="cols", - help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]") -) - -# now parse the command line to check which option is given and get associated values -parser <- OptionParser(usage="usage: %prog [options]", - option_list=option_list, - description="Compare two or more biological conditions in a RNA-Seq framework with DESeq2.", - epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>") -opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options - -# get options and arguments -workDir <- getwd() -projectName <- opt$projectName # name of the project -author <- opt$author # author of the statistical analysis/report -targetFile <- opt$targetFile # path to the design/target file -rawDir <- opt$rawDir # path to the directory containing raw counts files -featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example) -varInt <- opt$varInt # factor of interest -condRef <- opt$condRef # reference biological condition -batch <- opt$batch # blocking factor: NULL (default) or "batch" for example -fitType <- opt$fitType # mean-variance relationship: "parametric" (default) or "local" -cooksCutoff <- opt$cooksCutoff # outliers detection threshold (NULL to let DESeq2 choosing it) -independentFiltering <- opt$independentFiltering # TRUE/FALSE to perform independent filtering (default is TRUE) -alpha <- as.numeric(opt$alpha) # threshold of statistical significance -pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY" -typeTrans <- opt$typeTrans # transformation for PCA/clustering: "VST" ou "rlog" -locfunc <- opt$locfunc # "median" (default) or "shorth" to estimate the size factors -colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots - -# print(paste("workDir", workDir)) -# print(paste("projectName", projectName)) -# print(paste("author", author)) -# print(paste("targetFile", targetFile)) -# print(paste("rawDir", rawDir)) -# print(paste("varInt", varInt)) -# print(paste("condRef", condRef)) -# print(paste("batch", batch)) -# print(paste("fitType", fitType)) -# print(paste("cooksCutoff", cooksCutoff)) -# print(paste("independentFiltering", independentFiltering)) -# print(paste("alpha", alpha)) -# print(paste("pAdjustMethod", pAdjustMethod)) -# print(paste("typeTrans", typeTrans)) -# print(paste("locfunc", locfunc)) -# print(paste("featuresToRemove", featuresToRemove)) -# print(paste("colors", colors)) - -################################################################################ -### running script ### -################################################################################ -# setwd(workDir) -library(SARTools) - -# checking parameters -problem <- checkParameters.DESeq2(projectName=projectName,author=author,targetFile=targetFile, - rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt, - condRef=condRef,batch=batch,fitType=fitType,cooksCutoff=cooksCutoff, - independentFiltering=independentFiltering,alpha=alpha,pAdjustMethod=pAdjustMethod, - typeTrans=typeTrans,locfunc=locfunc,colors=colors) -if (problem) quit(save="yes") - -# loading target file -target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch) - -# loading counts -counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove) - -# description plots -majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors) - -# analysis with DESeq2 -out.DESeq2 <- run.DESeq2(counts=counts, target=target, varInt=varInt, batch=batch, - locfunc=locfunc, fitType=fitType, pAdjustMethod=pAdjustMethod, - cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha) - -# PCA + clustering -exploreCounts(object=out.DESeq2$dds, group=target[,varInt], typeTrans=typeTrans, col=colors) - -# summary of the analysis (boxplots, dispersions, diag size factors, export table, nDiffTotal, histograms, MA plot) -summaryResults <- summarizeResults.DESeq2(out.DESeq2, group=target[,varInt], col=colors, - independentFiltering=independentFiltering, - cooksCutoff=cooksCutoff, alpha=alpha) - -# save image of the R session -save.image(file=paste0(projectName, ".RData")) - -# generating HTML report -writeReport.DESeq2(target=target, counts=counts, out.DESeq2=out.DESeq2, summaryResults=summaryResults, - majSequences=majSequences, workDir=workDir, projectName=projectName, author=author, - targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt, - condRef=condRef, batch=batch, fitType=fitType, cooksCutoff=cooksCutoff, - independentFiltering=independentFiltering, alpha=alpha, pAdjustMethod=pAdjustMethod, - typeTrans=typeTrans, locfunc=locfunc, colors=colors) +################################################################################ +### R script to compare several conditions with the SARTools and DESeq2 packages +### Hugo Varet +### March 20th, 2018 +### designed to be executed with SARTools 1.6.3 +### run "Rscript template_script_DESeq2_CL.r --help" to get some help +################################################################################ + +rm(list=ls()) # remove all the objects from the R session +library(optparse) # to run the script in command lines + +# options list with associated default value. +option_list <- list( +make_option(c("-P", "--projectName"), + default=basename(getwd()), + dest="projectName", + help="name of the project used for the report [default: name of the current directory]."), + +make_option(c("-A", "--author"), + default=Sys.info()[7], + dest="author", + help="name of the report author [default: %default]."), + +make_option(c("-t", "--targetFile"), + default="target.txt", + dest="targetFile", + help="path to the design/target file [default: %default]."), + +make_option(c("-r", "--rawDir"), + default="raw", + dest="rawDir", + help="path to the directory containing the HTSeq files [default: %default]."), + +make_option(c("-F", "--featuresToRemove"), + default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual", + dest="FTR", + help="names of the features to be removed, more than once can be specified [default: %default]"), + +make_option(c("-v", "--varInt"), + default="group", + dest="varInt", + help="factor of interest [default: %default]"), + +make_option(c("-c", "--condRef"), + default="WT", + dest="condRef", + help="reference biological condition [default: %default]"), + +make_option(c("-b", "--batch"), + default=NULL, + dest="batch", + help="blocking factor [default: %default] or \"batch\" for example"), + +make_option(c("-f", "--fitType"), + default="parametric", + dest="fitType", + help="mean-variance relationship: [default: %default], local or mean"), + +make_option(c("-o", "--cooksCutoff"), + default=TRUE, + dest="cooksCutoff", + help="perform the outliers detection (default is TRUE)"), + +make_option(c("-i", "--independentFiltering"), + default=TRUE, + dest="independentFiltering", + help="perform independent filtering (default is TRUE)"), + +make_option(c("-a", "--alpha"), + default=0.05, + dest="alpha", + help="threshold of statistical significance [default: %default]"), + +make_option(c("-p", "--pAdjustMethod"), + default="BH", + dest="pAdjustMethod", + help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"), + +make_option(c("-T", "--typeTrans"), + default="VST", + dest="typeTrans", + help="transformation for PCA/clustering: \"VST\" ou \"rlog\" [default: %default]"), + +make_option(c("-l", "--locfunc"), + default="median", + dest="locfunc", + help="median or shorth to estimate the size factors [default: %default]"), + +make_option(c("-C", "--colors"), + default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange", + dest="cols", + help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]"), + +make_option(c("-g", "--forceCairoGraph"), + action="store_true", + default=FALSE, + dest="forceCairoGraph", + help="activate cairo type") + +) + +# now parse the command line to check which option is given and get associated values +parser <- OptionParser(usage="usage: %prog [options]", + option_list=option_list, + description="Compare two or more biological conditions in a RNA-Seq framework with DESeq2.", + epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>") +opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options + +# get options and arguments +workDir <- getwd() +projectName <- opt$projectName # name of the project +author <- opt$author # author of the statistical analysis/report +targetFile <- opt$targetFile # path to the design/target file +rawDir <- opt$rawDir # path to the directory containing raw counts files +featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example) +varInt <- opt$varInt # factor of interest +condRef <- opt$condRef # reference biological condition +batch <- opt$batch # blocking factor: NULL (default) or "batch" for example +fitType <- opt$fitType # mean-variance relationship: "parametric" (default), "local" or "mean" +cooksCutoff <- opt$cooksCutoff # outliers detection threshold (NULL to let DESeq2 choosing it) +independentFiltering <- opt$independentFiltering # TRUE/FALSE to perform independent filtering (default is TRUE) +alpha <- as.numeric(opt$alpha) # threshold of statistical significance +pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY" +typeTrans <- opt$typeTrans # transformation for PCA/clustering: "VST" ou "rlog" +locfunc <- opt$locfunc # "median" (default) or "shorth" to estimate the size factors +colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots +forceCairoGraph <- opt$forceCairoGraph # force cairo as plotting device if enabled +# print(paste("workDir", workDir)) +# print(paste("projectName", projectName)) +# print(paste("author", author)) +# print(paste("targetFile", targetFile)) +# print(paste("rawDir", rawDir)) +# print(paste("varInt", varInt)) +# print(paste("condRef", condRef)) +# print(paste("batch", batch)) +# print(paste("fitType", fitType)) +# print(paste("cooksCutoff", cooksCutoff)) +# print(paste("independentFiltering", independentFiltering)) +# print(paste("alpha", alpha)) +# print(paste("pAdjustMethod", pAdjustMethod)) +# print(paste("typeTrans", typeTrans)) +# print(paste("locfunc", locfunc)) +# print(paste("featuresToRemove", featuresToRemove)) +# print(paste("colors", colors)) + +################################################################################ +### running script ### +################################################################################ +# setwd(workDir) +library(SARTools) +if (forceCairoGraph) options(bitmapType="cairo") + +# checking parameters +problem <- checkParameters.DESeq2(projectName=projectName,author=author,targetFile=targetFile, + rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt, + condRef=condRef,batch=batch,fitType=fitType,cooksCutoff=cooksCutoff, + independentFiltering=independentFiltering,alpha=alpha,pAdjustMethod=pAdjustMethod, + typeTrans=typeTrans,locfunc=locfunc,colors=colors) +if (problem) quit(save="yes") + +# loading target file +target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch) + +# loading counts +counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove) + +# description plots +majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors) + +# analysis with DESeq2 +out.DESeq2 <- run.DESeq2(counts=counts, target=target, varInt=varInt, batch=batch, + locfunc=locfunc, fitType=fitType, pAdjustMethod=pAdjustMethod, + cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha) + +# PCA + clustering +exploreCounts(object=out.DESeq2$dds, group=target[,varInt], typeTrans=typeTrans, col=colors) + +# summary of the analysis (boxplots, dispersions, diag size factors, export table, nDiffTotal, histograms, MA plot) +summaryResults <- summarizeResults.DESeq2(out.DESeq2, group=target[,varInt], col=colors, + independentFiltering=independentFiltering, + cooksCutoff=cooksCutoff, alpha=alpha) + +# save image of the R session +save.image(file=paste0(projectName, ".RData")) + +# generating HTML report +writeReport.DESeq2(target=target, counts=counts, out.DESeq2=out.DESeq2, summaryResults=summaryResults, + majSequences=majSequences, workDir=workDir, projectName=projectName, author=author, + targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt, + condRef=condRef, batch=batch, fitType=fitType, cooksCutoff=cooksCutoff, + independentFiltering=independentFiltering, alpha=alpha, pAdjustMethod=pAdjustMethod, + typeTrans=typeTrans, locfunc=locfunc, colors=colors)
--- a/template_script_edgeR_CL.r Wed May 17 05:09:10 2017 -0400 +++ b/template_script_edgeR_CL.r Mon Oct 01 05:07:56 2018 -0400 @@ -1,175 +1,174 @@ -#!/local/gensoft2/exe/R/3.1.2/bin/Rscript - -# to run this script, use one of these commands: -# Rscript --no-save --no-restore --verbose template_script_edgeR_CL.r -r raw -v group -c T0 > log.txt 2>&1 -# Rscript template_script_edgeR_CL.r -r raw -v group -c T0 - -# to get help: -# Rscript template_script_edgeR_CL.r --help - -################################################################################ -### R script to compare several conditions with the SARTools and edgeR packages -### Hugo Varet -### April 20th, 2015 -### designed to be executed with SARTools 1.1.0 -################################################################################ - -rm(list=ls()) # remove all the objects from the R session -library(optparse) # to run the script in command lines - -# options list with associated default value. -option_list <- list( -make_option(c("-P", "--projectName"), - default=basename(getwd()), - dest="projectName", - help="name of the project used for the report [default: name of the current directory]."), - -make_option(c("-A", "--author"), - default=Sys.info()[7], - dest="author", - help="name of the report author [default: %default]."), - -make_option(c("-t", "--targetFile"), - default="target.txt", - dest="targetFile", - help="path to the design/target file [default: %default]."), - -make_option(c("-r", "--rawDir"), - default="raw", - dest="rawDir", - help="path to the directory containing the HTSeq files [default: %default]."), - -make_option(c("-F", "--featuresToRemove"), - default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual", - dest="FTR", - help="names of the features to be removed, more than once can be specified [default: %default]"), - -make_option(c("-v", "--varInt"), - default="group", - dest="varInt", - help="factor of interest [default: %default]"), - -make_option(c("-c", "--condRef"), - default="WT", - dest="condRef", - help="reference biological condition [default: %default]"), - -make_option(c("-b", "--batch"), - default=NULL, - dest="batch", - help="blocking factor [default: %default] or \"batch\" for example"), - -make_option(c("-a", "--alpha"), - default=0.05, - dest="alpha", - help="threshold of statistical significance [default: %default]"), - -make_option(c("-p", "--pAdjustMethod"), - default="BH", - dest="pAdjustMethod", - help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"), - -make_option(c("-m", "--cpmCutoff"), - default=1, - dest="cpmCutoff", - help="counts-per-million cut-off to filter low counts"), - -make_option(c("-g", "--gene.selection"), - default="pairwise", - dest="gene.selection", - help="selection of the features in MDSPlot [default: %default]"), - -make_option(c("-n", "--normalizationMethod"), - default="TMM", - dest="normalizationMethod", - help="normalization method in calcNormFactors: \"TMM\", \"RLE\" or \"upperquartile\" [default: %default]"), - -make_option(c("-C", "--colors"), - default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange", - dest="cols", - help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]") -) - -# now parse the command line to check which option is given and get associated values -parser <- OptionParser(usage="usage: %prog [options]", - option_list=option_list, - description="Compare two or more biological conditions in a RNA-Seq framework with edgeR.", - epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>") -opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options - -# get options and arguments -workDir <- getwd() -projectName <- opt$projectName # name of the project -author <- opt$author # author of the statistical analysis/report -targetFile <- opt$targetFile # path to the design/target file -rawDir <- opt$rawDir # path to the directory containing raw counts files -featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example) -varInt <- opt$varInt # factor of interest -condRef <- opt$condRef # reference biological condition -batch <- opt$batch # blocking factor: NULL (default) or "batch" for example -alpha <- as.numeric(opt$alpha) # threshold of statistical significance -pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY" -gene.selection <- opt$gene.selection # selection of the features in MDSPlot -normalizationMethod <- opt$normalizationMethod # normalization method in calcNormFactors -cpmCutoff <- opt$cpmCutoff # counts-per-million cut-off to filter low counts -colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots - -# print(paste("workDir", workDir)) -# print(paste("projectName", projectName)) -# print(paste("author", author)) -# print(paste("targetFile", targetFile)) -# print(paste("rawDir", rawDir)) -# print(paste("varInt", varInt)) -# print(paste("condRef", condRef)) -# print(paste("batch", batch)) -# print(paste("alpha", alpha)) -# print(paste("pAdjustMethod", pAdjustMethod)) -# print(paste("featuresToRemove", featuresToRemove)) -# print(paste("colors", colors)) -# print(paste("gene.selection", gene.selection)) -# print(paste("normalizationMethod", normalizationMethod)) -# print(paste("cpmCutoff", cpmCutoff)) - -################################################################################ -### running script ### -################################################################################ -# setwd(workDir) -library(SARTools) - -# checking parameters -problem <- checkParameters.edgeR(projectName=projectName,author=author,targetFile=targetFile, - rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt, - condRef=condRef,batch=batch,alpha=alpha,pAdjustMethod=pAdjustMethod, - cpmCutoff=cpmCutoff,gene.selection=gene.selection, - normalizationMethod=normalizationMethod,colors=colors) -if (problem) quit(save="yes") - -# loading target file -target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch) - -# loading counts -counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove) - -# description plots -majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors) - -# edgeR analysis -out.edgeR <- run.edgeR(counts=counts, target=target, varInt=varInt, condRef=condRef, - batch=batch, cpmCutoff=cpmCutoff, normalizationMethod=normalizationMethod, - pAdjustMethod=pAdjustMethod) - -# MDS + clustering -exploreCounts(object=out.edgeR$dge, group=target[,varInt], gene.selection=gene.selection, col=colors) - -# summary of the analysis (boxplots, dispersions, export table, nDiffTotal, histograms, MA plot) -summaryResults <- summarizeResults.edgeR(out.edgeR, group=target[,varInt], counts=counts, alpha=alpha, col=colors) - -# save image of the R session -save.image(file=paste0(projectName, ".RData")) - -# generating HTML report -writeReport.edgeR(target=target, counts=counts, out.edgeR=out.edgeR, summaryResults=summaryResults, - majSequences=majSequences, workDir=workDir, projectName=projectName, author=author, - targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt, - condRef=condRef, batch=batch, alpha=alpha, pAdjustMethod=pAdjustMethod, colors=colors, - gene.selection=gene.selection, normalizationMethod=normalizationMethod) +################################################################################ +### R script to compare several conditions with the SARTools and edgeR packages +### Hugo Varet +### May 16th, 2018 +### designed to be executed with SARTools 1.6.3 +### run "Rscript template_script_edgeR_CL.r --help" to get some help +################################################################################ + +rm(list=ls()) # remove all the objects from the R session +library(optparse) # to run the script in command lines + +# options list with associated default value. +option_list <- list( +make_option(c("-P", "--projectName"), + default=basename(getwd()), + dest="projectName", + help="name of the project used for the report [default: name of the current directory]."), + +make_option(c("-A", "--author"), + default=Sys.info()[7], + dest="author", + help="name of the report author [default: %default]."), + +make_option(c("-t", "--targetFile"), + default="target.txt", + dest="targetFile", + help="path to the design/target file [default: %default]."), + +make_option(c("-r", "--rawDir"), + default="raw", + dest="rawDir", + help="path to the directory containing the HTSeq files [default: %default]."), + +make_option(c("-F", "--featuresToRemove"), + default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual", + dest="FTR", + help="names of the features to be removed, more than once can be specified [default: %default]"), + +make_option(c("-v", "--varInt"), + default="group", + dest="varInt", + help="factor of interest [default: %default]"), + +make_option(c("-c", "--condRef"), + default="WT", + dest="condRef", + help="reference biological condition [default: %default]"), + +make_option(c("-b", "--batch"), + default=NULL, + dest="batch", + help="blocking factor [default: %default] or \"batch\" for example"), + +make_option(c("-a", "--alpha"), + default=0.05, + dest="alpha", + help="threshold of statistical significance [default: %default]"), + +make_option(c("-p", "--pAdjustMethod"), + default="BH", + dest="pAdjustMethod", + help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"), + +make_option(c("-m", "--cpmCutoff"), + default=1, + dest="cpmCutoff", + help="counts-per-million cut-off to filter low counts"), + +make_option(c("-g", "--gene.selection"), + default="pairwise", + dest="gene.selection", + help="selection of the features in MDSPlot [default: %default]"), + +make_option(c("-n", "--normalizationMethod"), + default="TMM", + dest="normalizationMethod", + help="normalization method in calcNormFactors: \"TMM\", \"RLE\" or \"upperquartile\" [default: %default]"), + +make_option(c("-C", "--colors"), + default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange", + dest="cols", + help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]"), + +make_option(c("-f", "--forceCairoGraph"), + action="store_true", + default=FALSE, + dest="forceCairoGraph", + help="activate cairo type") +) + +# now parse the command line to check which option is given and get associated values +parser <- OptionParser(usage="usage: %prog [options]", + option_list=option_list, + description="Compare two or more biological conditions in a RNA-Seq framework with edgeR.", + epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>") +opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options + +# get options and arguments +workDir <- getwd() +projectName <- opt$projectName # name of the project +author <- opt$author # author of the statistical analysis/report +targetFile <- opt$targetFile # path to the design/target file +rawDir <- opt$rawDir # path to the directory containing raw counts files +featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example) +varInt <- opt$varInt # factor of interest +condRef <- opt$condRef # reference biological condition +batch <- opt$batch # blocking factor: NULL (default) or "batch" for example +alpha <- as.numeric(opt$alpha) # threshold of statistical significance +pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY" +gene.selection <- opt$gene.selection # selection of the features in MDSPlot +normalizationMethod <- opt$normalizationMethod # normalization method in calcNormFactors +cpmCutoff <- opt$cpmCutoff # counts-per-million cut-off to filter low counts +colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots +forceCairoGraph <- opt$forceCairoGraph # force cairo as plotting device if enabled +# print(paste("workDir", workDir)) +# print(paste("projectName", projectName)) +# print(paste("author", author)) +# print(paste("targetFile", targetFile)) +# print(paste("rawDir", rawDir)) +# print(paste("varInt", varInt)) +# print(paste("condRef", condRef)) +# print(paste("batch", batch)) +# print(paste("alpha", alpha)) +# print(paste("pAdjustMethod", pAdjustMethod)) +# print(paste("featuresToRemove", featuresToRemove)) +# print(paste("colors", colors)) +# print(paste("gene.selection", gene.selection)) +# print(paste("normalizationMethod", normalizationMethod)) +# print(paste("cpmCutoff", cpmCutoff)) + +################################################################################ +### running script ### +################################################################################ +# setwd(workDir) +library(SARTools) +if (forceCairoGraph) options(bitmapType="cairo") + +# checking parameters +problem <- checkParameters.edgeR(projectName=projectName,author=author,targetFile=targetFile, + rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt, + condRef=condRef,batch=batch,alpha=alpha,pAdjustMethod=pAdjustMethod, + cpmCutoff=cpmCutoff,gene.selection=gene.selection, + normalizationMethod=normalizationMethod,colors=colors) +if (problem) quit(save="yes") + +# loading target file +target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch) + +# loading counts +counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove) + +# description plots +majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors) + +# edgeR analysis +out.edgeR <- run.edgeR(counts=counts, target=target, varInt=varInt, condRef=condRef, + batch=batch, cpmCutoff=cpmCutoff, normalizationMethod=normalizationMethod, + pAdjustMethod=pAdjustMethod) + +# MDS + clustering +exploreCounts(object=out.edgeR$dge, group=target[,varInt], gene.selection=gene.selection, col=colors) + +# summary of the analysis (boxplots, dispersions, export table, nDiffTotal, histograms, MA plot) +summaryResults <- summarizeResults.edgeR(out.edgeR, group=target[,varInt], counts=counts, alpha=alpha, col=colors) + +# save image of the R session +save.image(file=paste0(projectName, ".RData")) + +# generating HTML report +writeReport.edgeR(target=target, counts=counts, out.edgeR=out.edgeR, summaryResults=summaryResults, + majSequences=majSequences, workDir=workDir, projectName=projectName, author=author, + targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt, + condRef=condRef, batch=batch, alpha=alpha, pAdjustMethod=pAdjustMethod, cpmCutoff=cpmCutoff, + colors=colors, gene.selection=gene.selection, normalizationMethod=normalizationMethod)
