annotate template_script_DESeq2_CL.r @ 1:fe0ee346b17d draft

RSEM process files corrected in pre_sartools.py
author lgueguen
date Wed, 26 Apr 2017 05:04:18 -0400
parents 581d217c7337
children de6d0b7c17af
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
1 #!/local/gensoft2/exe/R/3.1.2/bin/Rscript
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
2
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
3 # to run this script, use one of these commands:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
4 # Rscript --no-save --no-restore --verbose template_script_DESeq2_CL.r -r raw -v group -c T0 > log.txt 2>&1
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
5 # Rscript template_script_DESeq2_CL.r -r raw -v group -c T0
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
6
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
7 # to get help:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
8 # Rscript template_script_DESeq2_CL.r --help
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
9
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
10 ################################################################################
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
11 ### R script to compare several conditions with the SARTools and DESeq2 packages
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
12 ### Hugo Varet
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
13 ### April 20th, 2015
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
14 ### designed to be executed with SARTools 1.1.0
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
15 ################################################################################
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
16
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
17 rm(list=ls()) # remove all the objects from the R session
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
18 library(optparse) # to run the script in command lines
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
19
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
20 # options list with associated default value.
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
21 option_list <- list(
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
22 make_option(c("-P", "--projectName"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
23 default=basename(getwd()),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
24 dest="projectName",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
25 help="name of the project used for the report [default: name of the current directory]."),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
26
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
27 make_option(c("-A", "--author"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
28 default=Sys.info()[7],
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
29 dest="author",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
30 help="name of the report author [default: %default]."),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
31
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
32 make_option(c("-t", "--targetFile"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
33 default="target.txt",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
34 dest="targetFile",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
35 help="path to the design/target file [default: %default]."),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
36
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
37 make_option(c("-r", "--rawDir"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
38 default="raw",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
39 dest="rawDir",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
40 help="path to the directory containing the HTSeq files [default: %default]."),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
41
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
42 make_option(c("-F", "--featuresToRemove"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
43 default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
44 dest="FTR",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
45 help="names of the features to be removed, more than once can be specified [default: %default]"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
46
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
47 make_option(c("-v", "--varInt"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
48 default="group",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
49 dest="varInt",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
50 help="factor of interest [default: %default]"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
51
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
52 make_option(c("-c", "--condRef"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
53 default="WT",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
54 dest="condRef",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
55 help="reference biological condition [default: %default]"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
56
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
57 make_option(c("-b", "--batch"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
58 default=NULL,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
59 dest="batch",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
60 help="blocking factor [default: %default] or \"batch\" for example"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
61
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
62 make_option(c("-f", "--fitType"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
63 default="parametric",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
64 dest="fitType",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
65 help="mean-variance relationship: [default: %default] or local"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
66
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
67 make_option(c("-o", "--cooksCutoff"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
68 default=TRUE,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
69 dest="cooksCutoff",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
70 help="perform the outliers detection (default is TRUE)"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
71
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
72 make_option(c("-i", "--independentFiltering"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
73 default=TRUE,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
74 dest="independentFiltering",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
75 help="perform independent filtering (default is TRUE)"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
76
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
77 make_option(c("-a", "--alpha"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
78 default=0.05,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
79 dest="alpha",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
80 help="threshold of statistical significance [default: %default]"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
81
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
82 make_option(c("-p", "--pAdjustMethod"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
83 default="BH",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
84 dest="pAdjustMethod",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
85 help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
86
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
87 make_option(c("-T", "--typeTrans"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
88 default="VST",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
89 dest="typeTrans",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
90 help="transformation for PCA/clustering: \"VST\" ou \"rlog\" [default: %default]"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
91
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
92 make_option(c("-l", "--locfunc"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
93 default="median",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
94 dest="locfunc",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
95 help="median or shorth to estimate the size factors [default: %default]"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
96
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
97 make_option(c("-C", "--colors"),
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
98 default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
99 dest="cols",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
100 help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]")
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
101 )
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
102
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
103 # now parse the command line to check which option is given and get associated values
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
104 parser <- OptionParser(usage="usage: %prog [options]",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
105 option_list=option_list,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
106 description="Compare two or more biological conditions in a RNA-Seq framework with DESeq2.",
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
107 epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>")
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
108 opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
109
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
110 # get options and arguments
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
111 workDir <- getwd()
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
112 projectName <- opt$projectName # name of the project
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
113 author <- opt$author # author of the statistical analysis/report
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
114 targetFile <- opt$targetFile # path to the design/target file
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
115 rawDir <- opt$rawDir # path to the directory containing raw counts files
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
116 featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
117 varInt <- opt$varInt # factor of interest
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
118 condRef <- opt$condRef # reference biological condition
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
119 batch <- opt$batch # blocking factor: NULL (default) or "batch" for example
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
120 fitType <- opt$fitType # mean-variance relationship: "parametric" (default) or "local"
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
121 cooksCutoff <- opt$cooksCutoff # outliers detection threshold (NULL to let DESeq2 choosing it)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
122 independentFiltering <- opt$independentFiltering # TRUE/FALSE to perform independent filtering (default is TRUE)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
123 alpha <- as.numeric(opt$alpha) # threshold of statistical significance
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
124 pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY"
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
125 typeTrans <- opt$typeTrans # transformation for PCA/clustering: "VST" ou "rlog"
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
126 locfunc <- opt$locfunc # "median" (default) or "shorth" to estimate the size factors
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
127 colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
128
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
129 # print(paste("workDir", workDir))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
130 # print(paste("projectName", projectName))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
131 # print(paste("author", author))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
132 # print(paste("targetFile", targetFile))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
133 # print(paste("rawDir", rawDir))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
134 # print(paste("varInt", varInt))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
135 # print(paste("condRef", condRef))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
136 # print(paste("batch", batch))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
137 # print(paste("fitType", fitType))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
138 # print(paste("cooksCutoff", cooksCutoff))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
139 # print(paste("independentFiltering", independentFiltering))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
140 # print(paste("alpha", alpha))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
141 # print(paste("pAdjustMethod", pAdjustMethod))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
142 # print(paste("typeTrans", typeTrans))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
143 # print(paste("locfunc", locfunc))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
144 # print(paste("featuresToRemove", featuresToRemove))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
145 # print(paste("colors", colors))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
146
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
147 ################################################################################
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
148 ### running script ###
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
149 ################################################################################
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
150 # setwd(workDir)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
151 library(SARTools)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
152
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
153 # checking parameters
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
154 problem <- checkParameters.DESeq2(projectName=projectName,author=author,targetFile=targetFile,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
155 rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
156 condRef=condRef,batch=batch,fitType=fitType,cooksCutoff=cooksCutoff,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
157 independentFiltering=independentFiltering,alpha=alpha,pAdjustMethod=pAdjustMethod,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
158 typeTrans=typeTrans,locfunc=locfunc,colors=colors)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
159 if (problem) quit(save="yes")
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
160
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
161 # loading target file
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
162 target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
163
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
164 # loading counts
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
165 counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
166
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
167 # description plots
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
168 majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
169
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
170 # analysis with DESeq2
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
171 out.DESeq2 <- run.DESeq2(counts=counts, target=target, varInt=varInt, batch=batch,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
172 locfunc=locfunc, fitType=fitType, pAdjustMethod=pAdjustMethod,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
173 cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
174
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
175 # PCA + clustering
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
176 exploreCounts(object=out.DESeq2$dds, group=target[,varInt], typeTrans=typeTrans, col=colors)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
177
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
178 # summary of the analysis (boxplots, dispersions, diag size factors, export table, nDiffTotal, histograms, MA plot)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
179 summaryResults <- summarizeResults.DESeq2(out.DESeq2, group=target[,varInt], col=colors,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
180 independentFiltering=independentFiltering,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
181 cooksCutoff=cooksCutoff, alpha=alpha)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
182
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
183 # save image of the R session
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
184 save.image(file=paste0(projectName, ".RData"))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
185
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
186 # generating HTML report
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
187 writeReport.DESeq2(target=target, counts=counts, out.DESeq2=out.DESeq2, summaryResults=summaryResults,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
188 majSequences=majSequences, workDir=workDir, projectName=projectName, author=author,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
189 targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
190 condRef=condRef, batch=batch, fitType=fitType, cooksCutoff=cooksCutoff,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
191 independentFiltering=independentFiltering, alpha=alpha, pAdjustMethod=pAdjustMethod,
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
192 typeTrans=typeTrans, locfunc=locfunc, colors=colors)