Mercurial > repos > iuc > isoformswitchanalyzer
comparison IsoformSwitchAnalyzeR.R @ 0:f3fefb6d8254 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/isoformswitchanalyzer commit 2c61e4c6151000201dd9a8323722a380bc235380
author | iuc |
---|---|
date | Tue, 24 Jan 2023 18:37:14 +0000 |
parents | |
children | 2c4e879a81cf |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f3fefb6d8254 |
---|---|
1 # Load the IsoformSwitchAnalyzeR library | |
2 library(IsoformSwitchAnalyzeR, | |
3 quietly = TRUE, | |
4 warn.conflicts = FALSE) | |
5 library(argparse, quietly = TRUE, warn.conflicts = FALSE) | |
6 library(dplyr, quietly = TRUE, warn.conflicts = FALSE) | |
7 | |
8 # setup R error handling to go to stderr | |
9 options( | |
10 show.error.messages = FALSE, | |
11 error = function() { | |
12 cat(geterrmessage(), file = stderr()) | |
13 q("no", 1, FALSE) | |
14 } | |
15 ) | |
16 | |
17 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
18 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
19 | |
20 ################################################################################ | |
21 ### Input Processing | |
22 ################################################################################ | |
23 | |
24 | |
25 # Collect arguments from command line | |
26 parser <- ArgumentParser(description = "IsoformSwitcheR R script") | |
27 | |
28 parser$add_argument("--modeSelector") | |
29 parser$add_argument("--parentDir", required = FALSE, help = "Parent directory") | |
30 parser$add_argument("--readLength", | |
31 required = FALSE, | |
32 type = "integer", | |
33 help = "Read length (required for stringtie)") | |
34 parser$add_argument("--annotation", required = FALSE, help = "Annotation") | |
35 parser$add_argument("--transcriptome", required = FALSE, help = "Transcriptome") | |
36 parser$add_argument( | |
37 "--fixStringTieAnnotationProblem", | |
38 action = "store_true", | |
39 required = FALSE, | |
40 help = "Fix StringTie annotation problem" | |
41 ) | |
42 parser$add_argument("--countFiles", required = FALSE, help = "Count files") | |
43 parser$add_argument("--toolSource", required = FALSE, help = "Tool source") | |
44 parser$add_argument("--rObject", required = FALSE, help = "R object") | |
45 parser$add_argument("--IFcutoff", | |
46 required = FALSE, | |
47 type = "numeric", | |
48 help = "IFcutoff") | |
49 parser$add_argument( | |
50 "--geneExpressionCutoff", | |
51 required = FALSE, | |
52 type = "numeric", | |
53 help = "Gene expression cutoff" | |
54 ) | |
55 parser$add_argument( | |
56 "--isoformExpressionCutoff", | |
57 required = FALSE, | |
58 type = "numeric", | |
59 help = "Isoform expression cutoff" | |
60 ) | |
61 parser$add_argument("--alpha", | |
62 required = FALSE, | |
63 type = "numeric", | |
64 help = "") | |
65 parser$add_argument("--dIFcutoff", | |
66 required = FALSE, | |
67 type = "numeric", | |
68 help = "dIF cutoff") | |
69 parser$add_argument( | |
70 "--onlySigIsoforms", | |
71 required = FALSE, | |
72 action = "store_true", | |
73 help = "Only significative isoforms" | |
74 ) | |
75 parser$add_argument( | |
76 "--filterForConsequences", | |
77 required = FALSE, | |
78 action = "store_true", | |
79 help = "Filter for consequences" | |
80 ) | |
81 parser$add_argument( | |
82 "--removeSingleIsformGenes", | |
83 required = FALSE, | |
84 action = "store_true", | |
85 help = "Remove single isoform genes" | |
86 ) | |
87 parser$add_argument( | |
88 "--keepIsoformInAllConditions", | |
89 required = FALSE, | |
90 action = "store_true", | |
91 help = "Keep isoform in all conditions" | |
92 ) | |
93 parser$add_argument( | |
94 "--correctForConfoundingFactors", | |
95 required = FALSE, | |
96 action = "store_true", | |
97 help = "Correct for confunding factors" | |
98 ) | |
99 parser$add_argument( | |
100 "--overwriteIFvalues", | |
101 required = FALSE, | |
102 action = "store_true", | |
103 help = "Overwrite IF values" | |
104 ) | |
105 parser$add_argument( | |
106 "--reduceToSwitchingGenes", | |
107 required = FALSE, | |
108 action = "store_true", | |
109 help = "Reduce to switching genes" | |
110 ) | |
111 parser$add_argument( | |
112 "--reduceFurtherToGenesWithConsequencePotential", | |
113 required = FALSE, | |
114 action = "store_true", | |
115 help = "Reduce further to genes with consequence potential" | |
116 ) | |
117 parser$add_argument( | |
118 "--keepIsoformInAllConditions2", | |
119 required = FALSE, | |
120 action = "store_true", | |
121 help = "Keep isoform in ll conditions" | |
122 ) | |
123 parser$add_argument("--minORFlength", | |
124 required = FALSE, | |
125 type = "integer", | |
126 help = "") | |
127 parser$add_argument("--orfMethod", required = FALSE, help = "ORF methods") | |
128 parser$add_argument("--PTCDistance", | |
129 required = FALSE, | |
130 type = "integer", | |
131 help = "") | |
132 parser$add_argument( | |
133 "--removeShortAAseq", | |
134 required = FALSE, | |
135 action = "store_true", | |
136 help = "Remove short aminoacid sequences" | |
137 ) | |
138 parser$add_argument( | |
139 "--removeLongAAseq", | |
140 required = FALSE, | |
141 action = "store_true", | |
142 help = "Remove long aminoacid sequences" | |
143 ) | |
144 parser$add_argument( | |
145 "--removeORFwithStop", | |
146 required = FALSE, | |
147 action = "store_true", | |
148 help = "Remove ORF with stop codon" | |
149 ) | |
150 parser$add_argument( | |
151 "--onlySwitchingGenes", | |
152 required = FALSE, | |
153 action = "store_true", | |
154 help = "Only switching genes" | |
155 ) | |
156 parser$add_argument("--analysisMode", required = FALSE, help = "Analyze all isoforms with differential usage or single genes") | |
157 parser$add_argument( | |
158 "--genesToPlot", | |
159 type = "integer", | |
160 default = 10, | |
161 required = FALSE, | |
162 help = "Number of genes to plot" | |
163 ) | |
164 parser$add_argument("--gene", required = FALSE, help = "Gene ID to analyze") | |
165 parser$add_argument( | |
166 "--sortByQvals", | |
167 action = "store_true", | |
168 required = FALSE, | |
169 help = "Sort genes by Q-val values" | |
170 ) | |
171 parser$add_argument("--countGenes", | |
172 action = "store_true", | |
173 required = FALSE, | |
174 help = "Count genes") | |
175 parser$add_argument( | |
176 "--asFractionTotal", | |
177 action = "store_true", | |
178 required = FALSE, | |
179 help = "Plot gene expresson as fraction of total" | |
180 ) | |
181 parser$add_argument("--plotGenes", | |
182 action = "store_true", | |
183 required = FALSE, | |
184 help = "Plot genes instead of isoforms") | |
185 parser$add_argument( | |
186 "--simplifyLocation", | |
187 action = "store_true", | |
188 required = FALSE, | |
189 help = "Simplify localtion" | |
190 ) | |
191 parser$add_argument( | |
192 "--removeEmptyConsequences", | |
193 action = "store_true", | |
194 required = FALSE, | |
195 help = "Remove empty consequences" | |
196 ) | |
197 parser$add_argument( | |
198 "--analysisOppositeConsequence", | |
199 action = "store_true", | |
200 required = FALSE, | |
201 help = "Analysi opposite consequences" | |
202 ) | |
203 parser$add_argument("--pathToCPATresultFile", | |
204 required = FALSE, | |
205 help = "Path to CPAT result file") | |
206 parser$add_argument("--pathToCPC2resultFile", | |
207 required = FALSE, | |
208 help = "Path to CPC2 result file") | |
209 parser$add_argument("--pathToPFAMresultFile", | |
210 required = FALSE, | |
211 help = "Path to PFAM result file") | |
212 parser$add_argument("--pathToNetSurfP2resultFile", | |
213 required = FALSE, | |
214 help = "Path to NetSurfP2 result file") | |
215 parser$add_argument("--pathToSignalPresultFile", | |
216 required = FALSE, | |
217 help = "Path to signalP result file") | |
218 parser$add_argument("--pathToIUPred2AresultFile", | |
219 required = FALSE, | |
220 help = "Path to IUPred2A result file") | |
221 parser$add_argument("--codingCutoff", | |
222 required = FALSE, | |
223 type = "numeric", | |
224 help = "Codding cutoff") | |
225 parser$add_argument( | |
226 "--removeNoncodingORFs", | |
227 action = "store_true", | |
228 required = FALSE, | |
229 help = "Remove non-coding ORFs" | |
230 ) | |
231 parser$add_argument( | |
232 "--minSignalPeptideProbability", | |
233 required = FALSE, | |
234 type = "numeric", | |
235 help = "Minimul signal peptide probability" | |
236 ) | |
237 parser$add_argument( | |
238 "--smoothingWindowSize", | |
239 type = "integer", | |
240 required = FALSE, | |
241 help = "Smoothing windows size" | |
242 ) | |
243 parser$add_argument( | |
244 "--probabilityCutoff", | |
245 required = FALSE, | |
246 type = "double", | |
247 help = "Probability cutoff" | |
248 ) | |
249 parser$add_argument("--minIdrSize", | |
250 required = FALSE, | |
251 type = "integer", | |
252 help = "Min Idr size") | |
253 parser$add_argument( | |
254 "--annotateBindingSites", | |
255 action = "store_true", | |
256 required = FALSE, | |
257 help = "Annotate binding sites" | |
258 ) | |
259 parser$add_argument( | |
260 "--minIdrBindingSize", | |
261 required = FALSE, | |
262 type = "integer", | |
263 help = "Minimun Idr binding size" | |
264 ) | |
265 parser$add_argument( | |
266 "--minIdrBindingOverlapFrac", | |
267 required = FALSE, | |
268 type = "numeric", | |
269 help = "" | |
270 ) | |
271 parser$add_argument("--ntCutoff", | |
272 required = FALSE, | |
273 type = "integer", | |
274 help = "Nucleotide cutoff") | |
275 parser$add_argument("--ntFracCutoff", | |
276 required = FALSE, | |
277 type = "numeric", | |
278 help = "Nucleotide fraction cutoff") | |
279 parser$add_argument( | |
280 "--ntJCsimCutoff", | |
281 required = FALSE, | |
282 type = "numeric", | |
283 help = "Nucleotide Jaccard simmilarity cutoff" | |
284 ) | |
285 parser$add_argument("--AaCutoff", | |
286 required = FALSE, | |
287 type = "integer", | |
288 help = "Aminoacid cutoff") | |
289 parser$add_argument("--AaFracCutoff", | |
290 required = FALSE, | |
291 type = "numeric", | |
292 help = "Aminoacid fraction cutoff") | |
293 parser$add_argument( | |
294 "--AaJCsimCutoff", | |
295 required = FALSE, | |
296 type = "numeric", | |
297 help = "Aminoacid Jaccard similarity cutoff" | |
298 ) | |
299 parser$add_argument( | |
300 "--removeNonConseqSwitches", | |
301 action = "store_true", | |
302 required = FALSE, | |
303 help = "Remove switches without consequences" | |
304 ) | |
305 parser$add_argument( | |
306 "--rescaleTranscripts", | |
307 action = "store_true", | |
308 required = FALSE, | |
309 help = "Rescale transcripts" | |
310 ) | |
311 parser$add_argument( | |
312 "--reverseMinus", | |
313 action = "store_true", | |
314 required = FALSE, | |
315 help = "Reverse minus" | |
316 ) | |
317 parser$add_argument( | |
318 "--addErrorbars", | |
319 action = "store_true", | |
320 required = FALSE, | |
321 help = "Add error bars" | |
322 ) | |
323 | |
324 | |
325 args <- parser$parse_args() | |
326 | |
327 # Data import | |
328 ################### | |
329 | |
330 if (args$modeSelector == "data_import") { | |
331 | |
332 quantificationData <- importIsoformExpression( | |
333 parentDir = args$parentDir, | |
334 addIsofomIdAsColumn = TRUE, | |
335 readLength = args$readLength | |
336 ) | |
337 | |
338 ### Make design matrix | |
339 myDesign <- data.frame( | |
340 sampleID = colnames(quantificationData$abundance)[-1], | |
341 condition = gsub( | |
342 "[[:digit:]]+", | |
343 "", | |
344 colnames(quantificationData$abundance)[-1] | |
345 ) | |
346 ) | |
347 | |
348 if (args$toolSource == "stringtie") { | |
349 SwitchList <- importRdata( | |
350 isoformCountMatrix = quantificationData$counts, | |
351 isoformRepExpression = quantificationData$abundance, | |
352 designMatrix = myDesign, | |
353 isoformExonAnnoation = args$annotation, | |
354 isoformNtFasta = args$transcriptome, | |
355 showProgress = TRUE, | |
356 fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem | |
357 ) | |
358 } else { | |
359 SwitchList <- importRdata( | |
360 isoformCountMatrix = quantificationData$counts, | |
361 isoformRepExpression = quantificationData$abundance, | |
362 designMatrix = myDesign, | |
363 isoformExonAnnoation = args$annotation, | |
364 isoformNtFasta = args$transcriptome, | |
365 showProgress = TRUE | |
366 ) | |
367 } | |
368 | |
369 | |
370 geneCountMatrix <- extractGeneExpression( | |
371 SwitchList, | |
372 extractCounts = TRUE, | |
373 addGeneNames = FALSE, | |
374 addIdsAsColumns = FALSE | |
375 ) | |
376 | |
377 if (args$countFiles == "collection") { | |
378 | |
379 expressionDF <- data.frame(geneCountMatrix) | |
380 | |
381 myDesign$condition[length(myDesign$condition)] | |
382 | |
383 dataframe_factor1 <- expressionDF %>% select(matches(myDesign$condition[1])) | |
384 dataframe_factor2 <- expressionDF %>% select(matches(myDesign$condition[length(myDesign$condition)])) | |
385 | |
386 | |
387 lf1 <- as.list(as.data.frame(dataframe_factor1)) | |
388 sampleNames1 <- colnames(as.data.frame(dataframe_factor1)) | |
389 | |
390 lf2 <- as.list(as.data.frame(dataframe_factor2)) | |
391 sampleNames2 <- colnames(as.data.frame(dataframe_factor2)) | |
392 | |
393 geneNames <- row.names(as.data.frame(expressionDF)) | |
394 | |
395 | |
396 for (index in seq_along(lf1)) { | |
397 tabular_expression <- data.frame(geneNames, lf1[index]) | |
398 colnames(tabular_expression) <- | |
399 c("Geneid", sampleNames1[index]) | |
400 filename <- | |
401 paste(sampleNames1[index], "dataset.tabular", sep = "_") | |
402 output_path <- paste("./count_files/factor1/", filename, sep = "") | |
403 write.table( | |
404 tabular_expression, | |
405 output_path, | |
406 sep = "\t", | |
407 row.names = FALSE, | |
408 quote = FALSE | |
409 ) | |
410 } | |
411 for (index in seq_along(lf2)) { | |
412 tabular_expression <- data.frame(geneNames, lf2[index]) | |
413 colnames(tabular_expression) <- | |
414 c("Geneid", sampleNames2[index]) | |
415 filename <- | |
416 paste(sampleNames2[index], "dataset.tabular", sep = "_") | |
417 output_path <- paste("./count_files/factor2/", filename, sep = "") | |
418 write.table( | |
419 tabular_expression, | |
420 output_path, | |
421 sep = "\t", | |
422 row.names = FALSE, | |
423 quote = FALSE | |
424 ) | |
425 } | |
426 } else if (args$countFiles == "matrix") { | |
427 expressionDF <- data.frame(geneCountMatrix) | |
428 geneNames <- row.names(expressionDF) | |
429 | |
430 expressionDF <- cbind(geneNames, expressionDF) | |
431 write.table( | |
432 as.data.frame(expressionDF), | |
433 "./count_files/matrix.tabular", | |
434 sep = "\t", | |
435 row.names = FALSE, | |
436 quote = FALSE | |
437 ) | |
438 write.table( | |
439 as.data.frame(myDesign), | |
440 "./count_files/samples.tabular", | |
441 sep = "\t", | |
442 row.names = FALSE, | |
443 quote = FALSE | |
444 ) | |
445 } | |
446 | |
447 save(SwitchList, file = "SwitchList.Rda") | |
448 | |
449 } | |
450 | |
451 if (args$modeSelector == "first_step") { | |
452 | |
453 # First part of the analysis | |
454 ############################# | |
455 | |
456 load(file = args$rObject) | |
457 | |
458 ### Filter | |
459 SwitchList <- preFilter( | |
460 SwitchList, | |
461 IFcutoff = args$IFcutoff, | |
462 geneExpressionCutoff = args$geneExpressionCutoff, | |
463 isoformExpressionCutoff = args$isoformExpressionCutoff, | |
464 removeSingleIsoformGenes = args$removeSingleIsformGenes, | |
465 onlySigIsoforms = args$onlySigIsoforms, | |
466 keepIsoformInAllConditions = args$keepIsoformInAllConditions, | |
467 alpha = args$alpha, | |
468 dIFcutoff = args$dIFcutoff, | |
469 ) | |
470 | |
471 ### Test for isoform switches | |
472 SwitchList <- isoformSwitchTestDEXSeq( | |
473 SwitchList, | |
474 alpha = args$alpha, | |
475 dIFcutoff = args$dIFcutoff, | |
476 correctForConfoundingFactors = args$correctForConfoundingFactors, | |
477 overwriteIFvalues = args$overwriteIFvalues, | |
478 reduceToSwitchingGenes = args$reduceToSwitchingGenes, | |
479 reduceFurtherToGenesWithConsequencePotential = args$reduceFurtherToGenesWithConsequencePotential, | |
480 onlySigIsoforms = args$onlySigIsoforms, | |
481 keepIsoformInAllConditions = args$keepIsoformInAllConditions2, | |
482 showProgress = TRUE, | |
483 ) | |
484 | |
485 SwitchList <- analyzeNovelIsoformORF( | |
486 SwitchList, | |
487 analysisAllIsoformsWithoutORF = TRUE, | |
488 minORFlength = args$minORFlength, | |
489 orfMethod = args$orfMethod, | |
490 PTCDistance = args$PTCDistance, | |
491 startCodons = "ATG", | |
492 stopCodons = c("TAA", "TAG", "TGA"), | |
493 showProgress = TRUE, | |
494 ) | |
495 | |
496 ### Extract Sequences | |
497 SwitchList <- extractSequence( | |
498 SwitchList, | |
499 onlySwitchingGenes = args$onlySwitchingGenes, | |
500 alpha = args$alpha, | |
501 dIFcutoff = args$dIFcutoff, | |
502 extractNTseq = TRUE, | |
503 extractAAseq = TRUE, | |
504 removeShortAAseq = args$removeShortAAseq, | |
505 removeLongAAseq = args$removeLongAAseq, | |
506 removeORFwithStop = args$removeORFwithStop, | |
507 addToSwitchAnalyzeRlist = TRUE, | |
508 writeToFile = TRUE, | |
509 pathToOutput = getwd(), | |
510 outputPrefix = "isoformSwitchAnalyzeR_isoform", | |
511 forceReExtraction = FALSE, | |
512 quiet = FALSE | |
513 ) | |
514 | |
515 ### Summary | |
516 switchSummary <- extractSwitchSummary( | |
517 SwitchList, | |
518 filterForConsequences = args$filterForConsequences, | |
519 alpha = args$alpha, | |
520 dIFcutoff = args$dIFcutoff, | |
521 onlySigIsoforms = args$onlySigIsoforms, | |
522 ) | |
523 | |
524 save(SwitchList, file = "SwitchList.Rda") | |
525 write.table( | |
526 switchSummary, | |
527 file = "switchSummary.tsv", | |
528 quote = FALSE, | |
529 sep = "\t", | |
530 col.names = TRUE, | |
531 row.names = FALSE | |
532 ) | |
533 | |
534 } | |
535 | |
536 if (args$modeSelector == "second_step") { | |
537 | |
538 # Second part of the analysis | |
539 ############################# | |
540 | |
541 load(file = args$rObject) | |
542 | |
543 ### Add annotation | |
544 if (!is.null(args$pathToCPATresultFile)) { | |
545 SwitchList <- analyzeCPAT( | |
546 SwitchList, | |
547 pathToCPATresultFile = args$pathToCPATresultFile, | |
548 codingCutoff = args$codingCutoff, | |
549 removeNoncodinORFs = args$removeNoncodingORFs | |
550 ) | |
551 } | |
552 | |
553 if (!is.null(args$pathToCPC2resultFile)) { | |
554 SwitchList <- analyzeCPC2( | |
555 SwitchList, | |
556 pathToCPC2resultFile = args$pathToCPC2resultFile, | |
557 removeNoncodinORFs = args$removeNoncodingORFs | |
558 ) | |
559 } | |
560 | |
561 if (!is.null(args$pathToPFAMresultFile)) { | |
562 pfamFiles <- list.files(path = args$pathToPFAMresultFile, | |
563 full.names = TRUE) | |
564 | |
565 SwitchList <- analyzePFAM(SwitchList, | |
566 pathToPFAMresultFile = pfamFiles, | |
567 showProgress = FALSE) | |
568 } | |
569 | |
570 if (!is.null(args$pathToNetSurfP2resultFile)) { | |
571 netsurfFiles <- list.files(path = args$pathToNetSurfP2resultFile, | |
572 full.names = TRUE) | |
573 | |
574 SwitchList <- analyzeNetSurfP2( | |
575 SwitchList, | |
576 pathToNetSurfP2resultFile = netsurfFiles, | |
577 smoothingWindowSize = args$smoothingWindowSize, | |
578 probabilityCutoff = args$probabilityCutoff, | |
579 minIdrSize = args$minIdrSize, | |
580 showProgress = TRUE | |
581 ) | |
582 } | |
583 | |
584 if (!is.null(args$pathToIUPred2AresultFile)) { | |
585 SwitchList <- analyzeIUPred2A( | |
586 SwitchList, | |
587 pathToIUPred2AresultFile = args$pathToIUPred2AresultFile, | |
588 smoothingWindowSize = args$smoothingWindowSize, | |
589 probabilityCutoff = args$probabilityCutoff, | |
590 minIdrSize = args$minIdrSize, | |
591 annotateBindingSites = args$annotateBindingSites, | |
592 minIdrBindingSize = args$minIdrBindingSize, | |
593 minIdrBindingOverlapFrac = args$minIdrBindingOverlapFrac, | |
594 showProgress = TRUE, | |
595 quiet = FALSE | |
596 ) | |
597 } | |
598 | |
599 if (!is.null(args$pathToSignalPresultFile)) { | |
600 signalpFiles <- list.files(path = args$pathToSignalPresultFile, | |
601 full.names = TRUE) | |
602 | |
603 SwitchList <- analyzeSignalP( | |
604 SwitchList, | |
605 pathToSignalPresultFile = signalpFiles, | |
606 minSignalPeptideProbability = args$minSignalPeptideProbability | |
607 ) | |
608 } | |
609 | |
610 SwitchList <- analyzeAlternativeSplicing( | |
611 SwitchList, | |
612 onlySwitchingGenes = args$onlySwitchingGenes, | |
613 alpha = args$alpha, | |
614 dIFcutoff = args$dIFcutoff, | |
615 showProgress = TRUE | |
616 ) | |
617 | |
618 SwitchList <- analyzeIntronRetention( | |
619 SwitchList, | |
620 onlySwitchingGenes = args$onlySwitchingGenes, | |
621 alpha = args$alpha, | |
622 dIFcutoff = args$dIFcutoff, | |
623 showProgress = TRUE | |
624 ) | |
625 | |
626 consequences <- c( | |
627 "intron_retention", | |
628 "NMD_status", | |
629 "isoform_seq_similarity", | |
630 "ORF_genomic", | |
631 "tss", | |
632 "tts" | |
633 ) | |
634 | |
635 if (!is.null(args$pathToCPATresultFile) || | |
636 !is.null(args$pathToCPC2resultFile)) { | |
637 updatedConsequences <- c(consequences, "coding_potential") | |
638 consequences <- updatedConsequences | |
639 } | |
640 | |
641 if (!is.null(args$pathToPFAMresultFile)) { | |
642 updatedConsequences <- c(consequences, "domains_identified") | |
643 consequences <- updatedConsequences | |
644 } | |
645 | |
646 if (!is.null(args$pathToSignalPresultFile)) { | |
647 updatedConsequences <- c(consequences, "signal_peptide_identified") | |
648 consequences <- updatedConsequences | |
649 } | |
650 | |
651 if (!is.null(args$pathToNetSurfP2resultFile) || | |
652 !is.null(args$pathToIUPred2AresultFile)) { | |
653 updatedConsequences <- c(consequences, "IDR_identified", "IDR_type") | |
654 consequences <- updatedConsequences | |
655 } | |
656 | |
657 SwitchList <- analyzeSwitchConsequences( | |
658 SwitchList, | |
659 consequencesToAnalyze = consequences, | |
660 alpha = args$alpha, | |
661 dIFcutoff = args$dIFcutoff, | |
662 onlySigIsoforms = args$onlySigIsoforms, | |
663 ntCutoff = args$ntCutoff, | |
664 ntJCsimCutoff = args$ntJCsimCutoff, | |
665 AaCutoff = args$AaCutoff, | |
666 AaFracCutoff = args$AaFracCutoff, | |
667 AaJCsimCutoff = args$AaJCsimCutoff, | |
668 removeNonConseqSwitches = args$removeNonConseqSwitches, | |
669 showProgress = TRUE | |
670 ) | |
671 | |
672 | |
673 ### Visual analysis | |
674 # Top genes | |
675 | |
676 if (args$analysisMode == "single") { | |
677 | |
678 outputFile <- file.path(getwd(), "single_gene.pdf") | |
679 | |
680 pdf( | |
681 file = outputFile, | |
682 onefile = FALSE, | |
683 height = 6, | |
684 width = 9 | |
685 ) | |
686 | |
687 switchPlot( | |
688 SwitchList, | |
689 gene = args$gene, | |
690 condition1 = myDesign$condition[1], | |
691 condition2 = myDesign$condition[length(myDesign$condition)], | |
692 IFcutoff = args$IFcutoff, | |
693 dIFcutoff = args$dIFcutoff, | |
694 rescaleTranscripts = args$rescaleTranscripts, | |
695 reverseMinus = args$reverseMinus, | |
696 addErrorbars = args$addErrorbars, | |
697 logYaxis = FALSE, | |
698 localTheme = theme_bw(base_size = 8) | |
699 ) | |
700 dev.off() | |
701 | |
702 } else { | |
703 mostSwitchingGene <- | |
704 extractTopSwitches( | |
705 SwitchList, | |
706 n = Inf, | |
707 filterForConsequences = args$filterForConsequences, | |
708 extractGenes = TRUE, | |
709 alpha = args$alpha, | |
710 dIFcutoff = args$dIFcutoff, | |
711 inEachComparison = FALSE, | |
712 sortByQvals = args$sortByQvals | |
713 ) | |
714 | |
715 write.table( | |
716 mostSwitchingGene, | |
717 file = "mostSwitchingGene.tsv", | |
718 quote = FALSE, | |
719 sep = "\t", | |
720 col.names = TRUE, | |
721 row.names = FALSE | |
722 ) | |
723 | |
724 | |
725 switchPlotTopSwitches( | |
726 SwitchList, | |
727 alpha = args$alpha, | |
728 dIFcutoff = args$dIFcutoff, | |
729 onlySigIsoforms = args$onlySigIsoforms, | |
730 n = args$genesToPlot, | |
731 sortByQvals = args$sortByQvals, | |
732 pathToOutput = getwd(), | |
733 fileType = "pdf" | |
734 ) | |
735 | |
736 outputFile <- | |
737 file.path(getwd(), "extractConsequencesSummary.pdf") | |
738 | |
739 pdf( | |
740 file = outputFile, | |
741 onefile = FALSE, | |
742 height = 6, | |
743 width = 9 | |
744 ) | |
745 | |
746 consequenceSummary <- extractConsequenceSummary( | |
747 SwitchList, | |
748 consequencesToAnalyze = "all", | |
749 includeCombined = FALSE, | |
750 asFractionTotal = args$asFractionTotal, | |
751 alpha = args$alpha, | |
752 dIFcutoff = args$dIFcutoff, | |
753 plot = TRUE, | |
754 plotGenes = args$plotGenes, | |
755 simplifyLocation = args$simplifyLocation, | |
756 removeEmptyConsequences = args$removeEmptyConsequences, | |
757 returnResult = TRUE, | |
758 localTheme = theme_bw() | |
759 ) | |
760 dev.off() | |
761 | |
762 write.table( | |
763 consequenceSummary, | |
764 file = "consequencesSummary.tsv", | |
765 quote = FALSE, | |
766 sep = "\t", | |
767 col.names = TRUE, | |
768 row.names = FALSE | |
769 ) | |
770 | |
771 | |
772 outputFile <- file.path(getwd(), "consequencesEnrichment.pdf") | |
773 pdf( | |
774 file = outputFile, | |
775 onefile = FALSE, | |
776 height = 6, | |
777 width = 9 | |
778 ) | |
779 consequenceEnrichment <- extractConsequenceEnrichment( | |
780 SwitchList, | |
781 consequencesToAnalyze = "all", | |
782 alpha = args$alpha, | |
783 dIFcutoff = args$dIFcutoff, | |
784 countGenes = args$countGenes, | |
785 analysisOppositeConsequence = args$analysisOppositeConsequence, | |
786 plot = TRUE, | |
787 localTheme = theme_bw(base_size = 12), | |
788 minEventsForPlotting = 10, | |
789 returnResult = TRUE | |
790 ) | |
791 dev.off() | |
792 | |
793 write.table( | |
794 consequenceEnrichment, | |
795 file = "consequencesEnrichment.tsv", | |
796 quote = FALSE, | |
797 sep = "\t", | |
798 col.names = TRUE, | |
799 row.names = FALSE | |
800 ) | |
801 | |
802 | |
803 outputFile <- file.path(getwd(), "splicingEnrichment.pdf") | |
804 pdf( | |
805 file = outputFile, | |
806 onefile = FALSE, | |
807 height = 6, | |
808 width = 9 | |
809 ) | |
810 splicingEnrichment <- extractSplicingEnrichment( | |
811 SwitchList, | |
812 splicingToAnalyze = "all", | |
813 alpha = args$alpha, | |
814 dIFcutoff = args$dIFcutoff, | |
815 onlySigIsoforms = args$onlySigIsoforms, | |
816 countGenes = args$countGenes, | |
817 plot = TRUE, | |
818 minEventsForPlotting = 10, | |
819 returnResult = TRUE | |
820 ) | |
821 dev.off() | |
822 | |
823 write.table( | |
824 splicingEnrichment, | |
825 file = "splicingEnrichment.tsv", | |
826 quote = FALSE, | |
827 sep = "\t", | |
828 col.names = TRUE, | |
829 row.names = FALSE | |
830 ) | |
831 | |
832 | |
833 outputFile <- file.path(getwd(), "splicingSummary.pdf") | |
834 pdf( | |
835 file = outputFile, | |
836 onefile = FALSE, | |
837 height = 6, | |
838 width = 9 | |
839 ) | |
840 splicingSummary <- extractSplicingSummary( | |
841 SwitchList, | |
842 splicingToAnalyze = "all", | |
843 asFractionTotal = args$asFractionTotal, | |
844 alpha = args$alpha, | |
845 dIFcutoff = args$dIFcutoff, | |
846 onlySigIsoforms = args$onlySigIsoforms, | |
847 plot = TRUE, | |
848 plotGenes = args$plotGenes, | |
849 localTheme = theme_bw(), | |
850 returnResult = TRUE | |
851 ) | |
852 dev.off() | |
853 | |
854 write.table( | |
855 splicingSummary, | |
856 file = "splicingSummary.tsv", | |
857 quote = FALSE, | |
858 sep = "\t", | |
859 col.names = TRUE, | |
860 row.names = FALSE | |
861 ) | |
862 | |
863 | |
864 ### Volcano like plot: | |
865 outputFile <- file.path(getwd(), "volcanoPlot.pdf") | |
866 pdf( | |
867 file = outputFile, | |
868 onefile = FALSE, | |
869 height = 6, | |
870 width = 9 | |
871 ) | |
872 ggplot(data = SwitchList$isoformFeatures, aes(x = dIF, y = -log10(isoform_switch_q_value))) + | |
873 geom_point(aes(color = abs(dIF) > 0.1 & | |
874 isoform_switch_q_value < 0.05), # default cutoff | |
875 size = 1) + | |
876 geom_hline(yintercept = -log10(0.05), linetype = "dashed") + # default cutoff | |
877 geom_vline(xintercept = c(-0.1, 0.1), linetype = "dashed") + # default cutoff | |
878 facet_wrap(~ condition_2) + | |
879 scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) + | |
880 labs(x = "dIF", y = "-Log10 ( Isoform Switch Q Value )") + | |
881 theme_bw() | |
882 dev.off() | |
883 | |
884 | |
885 ### Switch vs Gene changes: | |
886 outputFile <- file.path(getwd(), "switchGene.pdf") | |
887 pdf( | |
888 file = outputFile, | |
889 onefile = FALSE, | |
890 height = 6, | |
891 width = 9 | |
892 ) | |
893 ggplot(data = SwitchList$isoformFeatures, | |
894 aes(x = gene_log2_fold_change, y = dIF)) + | |
895 geom_point(aes(color = abs(dIF) > 0.1 & | |
896 isoform_switch_q_value < 0.05), | |
897 size = 1) + | |
898 facet_wrap(~ condition_2) + | |
899 geom_hline(yintercept = 0, linetype = "dashed") + | |
900 geom_vline(xintercept = 0, linetype = "dashed") + | |
901 scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) + | |
902 labs(x = "Gene log2 fold change", y = "dIF") + | |
903 theme_bw() | |
904 dev.off() | |
905 | |
906 outputFile <- file.path(getwd(), "splicingGenomewide.pdf") | |
907 pdf( | |
908 file = outputFile, | |
909 onefile = FALSE, | |
910 height = 6, | |
911 width = 9 | |
912 ) | |
913 splicingGenomeWide <- extractSplicingGenomeWide( | |
914 SwitchList, | |
915 featureToExtract = "all", | |
916 splicingToAnalyze = c("A3", "MES", "ATSS"), | |
917 plot = TRUE, | |
918 returnResult = TRUE | |
919 ) | |
920 dev.off() | |
921 } | |
922 save(SwitchList, file = "SwitchList.Rda") | |
923 | |
924 } |