comparison IsoformSwitchAnalyzeR.R @ 0:f3fefb6d8254 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/isoformswitchanalyzer commit 2c61e4c6151000201dd9a8323722a380bc235380
author iuc
date Tue, 24 Jan 2023 18:37:14 +0000
parents
children 2c4e879a81cf
comparison
equal deleted inserted replaced
-1:000000000000 0:f3fefb6d8254
1 # Load the IsoformSwitchAnalyzeR library
2 library(IsoformSwitchAnalyzeR,
3 quietly = TRUE,
4 warn.conflicts = FALSE)
5 library(argparse, quietly = TRUE, warn.conflicts = FALSE)
6 library(dplyr, quietly = TRUE, warn.conflicts = FALSE)
7
8 # setup R error handling to go to stderr
9 options(
10 show.error.messages = FALSE,
11 error = function() {
12 cat(geterrmessage(), file = stderr())
13 q("no", 1, FALSE)
14 }
15 )
16
17 # we need that to not crash galaxy with an UTF8 error on German LC settings.
18 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
19
20 ################################################################################
21 ### Input Processing
22 ################################################################################
23
24
25 # Collect arguments from command line
26 parser <- ArgumentParser(description = "IsoformSwitcheR R script")
27
28 parser$add_argument("--modeSelector")
29 parser$add_argument("--parentDir", required = FALSE, help = "Parent directory")
30 parser$add_argument("--readLength",
31 required = FALSE,
32 type = "integer",
33 help = "Read length (required for stringtie)")
34 parser$add_argument("--annotation", required = FALSE, help = "Annotation")
35 parser$add_argument("--transcriptome", required = FALSE, help = "Transcriptome")
36 parser$add_argument(
37 "--fixStringTieAnnotationProblem",
38 action = "store_true",
39 required = FALSE,
40 help = "Fix StringTie annotation problem"
41 )
42 parser$add_argument("--countFiles", required = FALSE, help = "Count files")
43 parser$add_argument("--toolSource", required = FALSE, help = "Tool source")
44 parser$add_argument("--rObject", required = FALSE, help = "R object")
45 parser$add_argument("--IFcutoff",
46 required = FALSE,
47 type = "numeric",
48 help = "IFcutoff")
49 parser$add_argument(
50 "--geneExpressionCutoff",
51 required = FALSE,
52 type = "numeric",
53 help = "Gene expression cutoff"
54 )
55 parser$add_argument(
56 "--isoformExpressionCutoff",
57 required = FALSE,
58 type = "numeric",
59 help = "Isoform expression cutoff"
60 )
61 parser$add_argument("--alpha",
62 required = FALSE,
63 type = "numeric",
64 help = "")
65 parser$add_argument("--dIFcutoff",
66 required = FALSE,
67 type = "numeric",
68 help = "dIF cutoff")
69 parser$add_argument(
70 "--onlySigIsoforms",
71 required = FALSE,
72 action = "store_true",
73 help = "Only significative isoforms"
74 )
75 parser$add_argument(
76 "--filterForConsequences",
77 required = FALSE,
78 action = "store_true",
79 help = "Filter for consequences"
80 )
81 parser$add_argument(
82 "--removeSingleIsformGenes",
83 required = FALSE,
84 action = "store_true",
85 help = "Remove single isoform genes"
86 )
87 parser$add_argument(
88 "--keepIsoformInAllConditions",
89 required = FALSE,
90 action = "store_true",
91 help = "Keep isoform in all conditions"
92 )
93 parser$add_argument(
94 "--correctForConfoundingFactors",
95 required = FALSE,
96 action = "store_true",
97 help = "Correct for confunding factors"
98 )
99 parser$add_argument(
100 "--overwriteIFvalues",
101 required = FALSE,
102 action = "store_true",
103 help = "Overwrite IF values"
104 )
105 parser$add_argument(
106 "--reduceToSwitchingGenes",
107 required = FALSE,
108 action = "store_true",
109 help = "Reduce to switching genes"
110 )
111 parser$add_argument(
112 "--reduceFurtherToGenesWithConsequencePotential",
113 required = FALSE,
114 action = "store_true",
115 help = "Reduce further to genes with consequence potential"
116 )
117 parser$add_argument(
118 "--keepIsoformInAllConditions2",
119 required = FALSE,
120 action = "store_true",
121 help = "Keep isoform in ll conditions"
122 )
123 parser$add_argument("--minORFlength",
124 required = FALSE,
125 type = "integer",
126 help = "")
127 parser$add_argument("--orfMethod", required = FALSE, help = "ORF methods")
128 parser$add_argument("--PTCDistance",
129 required = FALSE,
130 type = "integer",
131 help = "")
132 parser$add_argument(
133 "--removeShortAAseq",
134 required = FALSE,
135 action = "store_true",
136 help = "Remove short aminoacid sequences"
137 )
138 parser$add_argument(
139 "--removeLongAAseq",
140 required = FALSE,
141 action = "store_true",
142 help = "Remove long aminoacid sequences"
143 )
144 parser$add_argument(
145 "--removeORFwithStop",
146 required = FALSE,
147 action = "store_true",
148 help = "Remove ORF with stop codon"
149 )
150 parser$add_argument(
151 "--onlySwitchingGenes",
152 required = FALSE,
153 action = "store_true",
154 help = "Only switching genes"
155 )
156 parser$add_argument("--analysisMode", required = FALSE, help = "Analyze all isoforms with differential usage or single genes")
157 parser$add_argument(
158 "--genesToPlot",
159 type = "integer",
160 default = 10,
161 required = FALSE,
162 help = "Number of genes to plot"
163 )
164 parser$add_argument("--gene", required = FALSE, help = "Gene ID to analyze")
165 parser$add_argument(
166 "--sortByQvals",
167 action = "store_true",
168 required = FALSE,
169 help = "Sort genes by Q-val values"
170 )
171 parser$add_argument("--countGenes",
172 action = "store_true",
173 required = FALSE,
174 help = "Count genes")
175 parser$add_argument(
176 "--asFractionTotal",
177 action = "store_true",
178 required = FALSE,
179 help = "Plot gene expresson as fraction of total"
180 )
181 parser$add_argument("--plotGenes",
182 action = "store_true",
183 required = FALSE,
184 help = "Plot genes instead of isoforms")
185 parser$add_argument(
186 "--simplifyLocation",
187 action = "store_true",
188 required = FALSE,
189 help = "Simplify localtion"
190 )
191 parser$add_argument(
192 "--removeEmptyConsequences",
193 action = "store_true",
194 required = FALSE,
195 help = "Remove empty consequences"
196 )
197 parser$add_argument(
198 "--analysisOppositeConsequence",
199 action = "store_true",
200 required = FALSE,
201 help = "Analysi opposite consequences"
202 )
203 parser$add_argument("--pathToCPATresultFile",
204 required = FALSE,
205 help = "Path to CPAT result file")
206 parser$add_argument("--pathToCPC2resultFile",
207 required = FALSE,
208 help = "Path to CPC2 result file")
209 parser$add_argument("--pathToPFAMresultFile",
210 required = FALSE,
211 help = "Path to PFAM result file")
212 parser$add_argument("--pathToNetSurfP2resultFile",
213 required = FALSE,
214 help = "Path to NetSurfP2 result file")
215 parser$add_argument("--pathToSignalPresultFile",
216 required = FALSE,
217 help = "Path to signalP result file")
218 parser$add_argument("--pathToIUPred2AresultFile",
219 required = FALSE,
220 help = "Path to IUPred2A result file")
221 parser$add_argument("--codingCutoff",
222 required = FALSE,
223 type = "numeric",
224 help = "Codding cutoff")
225 parser$add_argument(
226 "--removeNoncodingORFs",
227 action = "store_true",
228 required = FALSE,
229 help = "Remove non-coding ORFs"
230 )
231 parser$add_argument(
232 "--minSignalPeptideProbability",
233 required = FALSE,
234 type = "numeric",
235 help = "Minimul signal peptide probability"
236 )
237 parser$add_argument(
238 "--smoothingWindowSize",
239 type = "integer",
240 required = FALSE,
241 help = "Smoothing windows size"
242 )
243 parser$add_argument(
244 "--probabilityCutoff",
245 required = FALSE,
246 type = "double",
247 help = "Probability cutoff"
248 )
249 parser$add_argument("--minIdrSize",
250 required = FALSE,
251 type = "integer",
252 help = "Min Idr size")
253 parser$add_argument(
254 "--annotateBindingSites",
255 action = "store_true",
256 required = FALSE,
257 help = "Annotate binding sites"
258 )
259 parser$add_argument(
260 "--minIdrBindingSize",
261 required = FALSE,
262 type = "integer",
263 help = "Minimun Idr binding size"
264 )
265 parser$add_argument(
266 "--minIdrBindingOverlapFrac",
267 required = FALSE,
268 type = "numeric",
269 help = ""
270 )
271 parser$add_argument("--ntCutoff",
272 required = FALSE,
273 type = "integer",
274 help = "Nucleotide cutoff")
275 parser$add_argument("--ntFracCutoff",
276 required = FALSE,
277 type = "numeric",
278 help = "Nucleotide fraction cutoff")
279 parser$add_argument(
280 "--ntJCsimCutoff",
281 required = FALSE,
282 type = "numeric",
283 help = "Nucleotide Jaccard simmilarity cutoff"
284 )
285 parser$add_argument("--AaCutoff",
286 required = FALSE,
287 type = "integer",
288 help = "Aminoacid cutoff")
289 parser$add_argument("--AaFracCutoff",
290 required = FALSE,
291 type = "numeric",
292 help = "Aminoacid fraction cutoff")
293 parser$add_argument(
294 "--AaJCsimCutoff",
295 required = FALSE,
296 type = "numeric",
297 help = "Aminoacid Jaccard similarity cutoff"
298 )
299 parser$add_argument(
300 "--removeNonConseqSwitches",
301 action = "store_true",
302 required = FALSE,
303 help = "Remove switches without consequences"
304 )
305 parser$add_argument(
306 "--rescaleTranscripts",
307 action = "store_true",
308 required = FALSE,
309 help = "Rescale transcripts"
310 )
311 parser$add_argument(
312 "--reverseMinus",
313 action = "store_true",
314 required = FALSE,
315 help = "Reverse minus"
316 )
317 parser$add_argument(
318 "--addErrorbars",
319 action = "store_true",
320 required = FALSE,
321 help = "Add error bars"
322 )
323
324
325 args <- parser$parse_args()
326
327 # Data import
328 ###################
329
330 if (args$modeSelector == "data_import") {
331
332 quantificationData <- importIsoformExpression(
333 parentDir = args$parentDir,
334 addIsofomIdAsColumn = TRUE,
335 readLength = args$readLength
336 )
337
338 ### Make design matrix
339 myDesign <- data.frame(
340 sampleID = colnames(quantificationData$abundance)[-1],
341 condition = gsub(
342 "[[:digit:]]+",
343 "",
344 colnames(quantificationData$abundance)[-1]
345 )
346 )
347
348 if (args$toolSource == "stringtie") {
349 SwitchList <- importRdata(
350 isoformCountMatrix = quantificationData$counts,
351 isoformRepExpression = quantificationData$abundance,
352 designMatrix = myDesign,
353 isoformExonAnnoation = args$annotation,
354 isoformNtFasta = args$transcriptome,
355 showProgress = TRUE,
356 fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem
357 )
358 } else {
359 SwitchList <- importRdata(
360 isoformCountMatrix = quantificationData$counts,
361 isoformRepExpression = quantificationData$abundance,
362 designMatrix = myDesign,
363 isoformExonAnnoation = args$annotation,
364 isoformNtFasta = args$transcriptome,
365 showProgress = TRUE
366 )
367 }
368
369
370 geneCountMatrix <- extractGeneExpression(
371 SwitchList,
372 extractCounts = TRUE,
373 addGeneNames = FALSE,
374 addIdsAsColumns = FALSE
375 )
376
377 if (args$countFiles == "collection") {
378
379 expressionDF <- data.frame(geneCountMatrix)
380
381 myDesign$condition[length(myDesign$condition)]
382
383 dataframe_factor1 <- expressionDF %>% select(matches(myDesign$condition[1]))
384 dataframe_factor2 <- expressionDF %>% select(matches(myDesign$condition[length(myDesign$condition)]))
385
386
387 lf1 <- as.list(as.data.frame(dataframe_factor1))
388 sampleNames1 <- colnames(as.data.frame(dataframe_factor1))
389
390 lf2 <- as.list(as.data.frame(dataframe_factor2))
391 sampleNames2 <- colnames(as.data.frame(dataframe_factor2))
392
393 geneNames <- row.names(as.data.frame(expressionDF))
394
395
396 for (index in seq_along(lf1)) {
397 tabular_expression <- data.frame(geneNames, lf1[index])
398 colnames(tabular_expression) <-
399 c("Geneid", sampleNames1[index])
400 filename <-
401 paste(sampleNames1[index], "dataset.tabular", sep = "_")
402 output_path <- paste("./count_files/factor1/", filename, sep = "")
403 write.table(
404 tabular_expression,
405 output_path,
406 sep = "\t",
407 row.names = FALSE,
408 quote = FALSE
409 )
410 }
411 for (index in seq_along(lf2)) {
412 tabular_expression <- data.frame(geneNames, lf2[index])
413 colnames(tabular_expression) <-
414 c("Geneid", sampleNames2[index])
415 filename <-
416 paste(sampleNames2[index], "dataset.tabular", sep = "_")
417 output_path <- paste("./count_files/factor2/", filename, sep = "")
418 write.table(
419 tabular_expression,
420 output_path,
421 sep = "\t",
422 row.names = FALSE,
423 quote = FALSE
424 )
425 }
426 } else if (args$countFiles == "matrix") {
427 expressionDF <- data.frame(geneCountMatrix)
428 geneNames <- row.names(expressionDF)
429
430 expressionDF <- cbind(geneNames, expressionDF)
431 write.table(
432 as.data.frame(expressionDF),
433 "./count_files/matrix.tabular",
434 sep = "\t",
435 row.names = FALSE,
436 quote = FALSE
437 )
438 write.table(
439 as.data.frame(myDesign),
440 "./count_files/samples.tabular",
441 sep = "\t",
442 row.names = FALSE,
443 quote = FALSE
444 )
445 }
446
447 save(SwitchList, file = "SwitchList.Rda")
448
449 }
450
451 if (args$modeSelector == "first_step") {
452
453 # First part of the analysis
454 #############################
455
456 load(file = args$rObject)
457
458 ### Filter
459 SwitchList <- preFilter(
460 SwitchList,
461 IFcutoff = args$IFcutoff,
462 geneExpressionCutoff = args$geneExpressionCutoff,
463 isoformExpressionCutoff = args$isoformExpressionCutoff,
464 removeSingleIsoformGenes = args$removeSingleIsformGenes,
465 onlySigIsoforms = args$onlySigIsoforms,
466 keepIsoformInAllConditions = args$keepIsoformInAllConditions,
467 alpha = args$alpha,
468 dIFcutoff = args$dIFcutoff,
469 )
470
471 ### Test for isoform switches
472 SwitchList <- isoformSwitchTestDEXSeq(
473 SwitchList,
474 alpha = args$alpha,
475 dIFcutoff = args$dIFcutoff,
476 correctForConfoundingFactors = args$correctForConfoundingFactors,
477 overwriteIFvalues = args$overwriteIFvalues,
478 reduceToSwitchingGenes = args$reduceToSwitchingGenes,
479 reduceFurtherToGenesWithConsequencePotential = args$reduceFurtherToGenesWithConsequencePotential,
480 onlySigIsoforms = args$onlySigIsoforms,
481 keepIsoformInAllConditions = args$keepIsoformInAllConditions2,
482 showProgress = TRUE,
483 )
484
485 SwitchList <- analyzeNovelIsoformORF(
486 SwitchList,
487 analysisAllIsoformsWithoutORF = TRUE,
488 minORFlength = args$minORFlength,
489 orfMethod = args$orfMethod,
490 PTCDistance = args$PTCDistance,
491 startCodons = "ATG",
492 stopCodons = c("TAA", "TAG", "TGA"),
493 showProgress = TRUE,
494 )
495
496 ### Extract Sequences
497 SwitchList <- extractSequence(
498 SwitchList,
499 onlySwitchingGenes = args$onlySwitchingGenes,
500 alpha = args$alpha,
501 dIFcutoff = args$dIFcutoff,
502 extractNTseq = TRUE,
503 extractAAseq = TRUE,
504 removeShortAAseq = args$removeShortAAseq,
505 removeLongAAseq = args$removeLongAAseq,
506 removeORFwithStop = args$removeORFwithStop,
507 addToSwitchAnalyzeRlist = TRUE,
508 writeToFile = TRUE,
509 pathToOutput = getwd(),
510 outputPrefix = "isoformSwitchAnalyzeR_isoform",
511 forceReExtraction = FALSE,
512 quiet = FALSE
513 )
514
515 ### Summary
516 switchSummary <- extractSwitchSummary(
517 SwitchList,
518 filterForConsequences = args$filterForConsequences,
519 alpha = args$alpha,
520 dIFcutoff = args$dIFcutoff,
521 onlySigIsoforms = args$onlySigIsoforms,
522 )
523
524 save(SwitchList, file = "SwitchList.Rda")
525 write.table(
526 switchSummary,
527 file = "switchSummary.tsv",
528 quote = FALSE,
529 sep = "\t",
530 col.names = TRUE,
531 row.names = FALSE
532 )
533
534 }
535
536 if (args$modeSelector == "second_step") {
537
538 # Second part of the analysis
539 #############################
540
541 load(file = args$rObject)
542
543 ### Add annotation
544 if (!is.null(args$pathToCPATresultFile)) {
545 SwitchList <- analyzeCPAT(
546 SwitchList,
547 pathToCPATresultFile = args$pathToCPATresultFile,
548 codingCutoff = args$codingCutoff,
549 removeNoncodinORFs = args$removeNoncodingORFs
550 )
551 }
552
553 if (!is.null(args$pathToCPC2resultFile)) {
554 SwitchList <- analyzeCPC2(
555 SwitchList,
556 pathToCPC2resultFile = args$pathToCPC2resultFile,
557 removeNoncodinORFs = args$removeNoncodingORFs
558 )
559 }
560
561 if (!is.null(args$pathToPFAMresultFile)) {
562 pfamFiles <- list.files(path = args$pathToPFAMresultFile,
563 full.names = TRUE)
564
565 SwitchList <- analyzePFAM(SwitchList,
566 pathToPFAMresultFile = pfamFiles,
567 showProgress = FALSE)
568 }
569
570 if (!is.null(args$pathToNetSurfP2resultFile)) {
571 netsurfFiles <- list.files(path = args$pathToNetSurfP2resultFile,
572 full.names = TRUE)
573
574 SwitchList <- analyzeNetSurfP2(
575 SwitchList,
576 pathToNetSurfP2resultFile = netsurfFiles,
577 smoothingWindowSize = args$smoothingWindowSize,
578 probabilityCutoff = args$probabilityCutoff,
579 minIdrSize = args$minIdrSize,
580 showProgress = TRUE
581 )
582 }
583
584 if (!is.null(args$pathToIUPred2AresultFile)) {
585 SwitchList <- analyzeIUPred2A(
586 SwitchList,
587 pathToIUPred2AresultFile = args$pathToIUPred2AresultFile,
588 smoothingWindowSize = args$smoothingWindowSize,
589 probabilityCutoff = args$probabilityCutoff,
590 minIdrSize = args$minIdrSize,
591 annotateBindingSites = args$annotateBindingSites,
592 minIdrBindingSize = args$minIdrBindingSize,
593 minIdrBindingOverlapFrac = args$minIdrBindingOverlapFrac,
594 showProgress = TRUE,
595 quiet = FALSE
596 )
597 }
598
599 if (!is.null(args$pathToSignalPresultFile)) {
600 signalpFiles <- list.files(path = args$pathToSignalPresultFile,
601 full.names = TRUE)
602
603 SwitchList <- analyzeSignalP(
604 SwitchList,
605 pathToSignalPresultFile = signalpFiles,
606 minSignalPeptideProbability = args$minSignalPeptideProbability
607 )
608 }
609
610 SwitchList <- analyzeAlternativeSplicing(
611 SwitchList,
612 onlySwitchingGenes = args$onlySwitchingGenes,
613 alpha = args$alpha,
614 dIFcutoff = args$dIFcutoff,
615 showProgress = TRUE
616 )
617
618 SwitchList <- analyzeIntronRetention(
619 SwitchList,
620 onlySwitchingGenes = args$onlySwitchingGenes,
621 alpha = args$alpha,
622 dIFcutoff = args$dIFcutoff,
623 showProgress = TRUE
624 )
625
626 consequences <- c(
627 "intron_retention",
628 "NMD_status",
629 "isoform_seq_similarity",
630 "ORF_genomic",
631 "tss",
632 "tts"
633 )
634
635 if (!is.null(args$pathToCPATresultFile) ||
636 !is.null(args$pathToCPC2resultFile)) {
637 updatedConsequences <- c(consequences, "coding_potential")
638 consequences <- updatedConsequences
639 }
640
641 if (!is.null(args$pathToPFAMresultFile)) {
642 updatedConsequences <- c(consequences, "domains_identified")
643 consequences <- updatedConsequences
644 }
645
646 if (!is.null(args$pathToSignalPresultFile)) {
647 updatedConsequences <- c(consequences, "signal_peptide_identified")
648 consequences <- updatedConsequences
649 }
650
651 if (!is.null(args$pathToNetSurfP2resultFile) ||
652 !is.null(args$pathToIUPred2AresultFile)) {
653 updatedConsequences <- c(consequences, "IDR_identified", "IDR_type")
654 consequences <- updatedConsequences
655 }
656
657 SwitchList <- analyzeSwitchConsequences(
658 SwitchList,
659 consequencesToAnalyze = consequences,
660 alpha = args$alpha,
661 dIFcutoff = args$dIFcutoff,
662 onlySigIsoforms = args$onlySigIsoforms,
663 ntCutoff = args$ntCutoff,
664 ntJCsimCutoff = args$ntJCsimCutoff,
665 AaCutoff = args$AaCutoff,
666 AaFracCutoff = args$AaFracCutoff,
667 AaJCsimCutoff = args$AaJCsimCutoff,
668 removeNonConseqSwitches = args$removeNonConseqSwitches,
669 showProgress = TRUE
670 )
671
672
673 ### Visual analysis
674 # Top genes
675
676 if (args$analysisMode == "single") {
677
678 outputFile <- file.path(getwd(), "single_gene.pdf")
679
680 pdf(
681 file = outputFile,
682 onefile = FALSE,
683 height = 6,
684 width = 9
685 )
686
687 switchPlot(
688 SwitchList,
689 gene = args$gene,
690 condition1 = myDesign$condition[1],
691 condition2 = myDesign$condition[length(myDesign$condition)],
692 IFcutoff = args$IFcutoff,
693 dIFcutoff = args$dIFcutoff,
694 rescaleTranscripts = args$rescaleTranscripts,
695 reverseMinus = args$reverseMinus,
696 addErrorbars = args$addErrorbars,
697 logYaxis = FALSE,
698 localTheme = theme_bw(base_size = 8)
699 )
700 dev.off()
701
702 } else {
703 mostSwitchingGene <-
704 extractTopSwitches(
705 SwitchList,
706 n = Inf,
707 filterForConsequences = args$filterForConsequences,
708 extractGenes = TRUE,
709 alpha = args$alpha,
710 dIFcutoff = args$dIFcutoff,
711 inEachComparison = FALSE,
712 sortByQvals = args$sortByQvals
713 )
714
715 write.table(
716 mostSwitchingGene,
717 file = "mostSwitchingGene.tsv",
718 quote = FALSE,
719 sep = "\t",
720 col.names = TRUE,
721 row.names = FALSE
722 )
723
724
725 switchPlotTopSwitches(
726 SwitchList,
727 alpha = args$alpha,
728 dIFcutoff = args$dIFcutoff,
729 onlySigIsoforms = args$onlySigIsoforms,
730 n = args$genesToPlot,
731 sortByQvals = args$sortByQvals,
732 pathToOutput = getwd(),
733 fileType = "pdf"
734 )
735
736 outputFile <-
737 file.path(getwd(), "extractConsequencesSummary.pdf")
738
739 pdf(
740 file = outputFile,
741 onefile = FALSE,
742 height = 6,
743 width = 9
744 )
745
746 consequenceSummary <- extractConsequenceSummary(
747 SwitchList,
748 consequencesToAnalyze = "all",
749 includeCombined = FALSE,
750 asFractionTotal = args$asFractionTotal,
751 alpha = args$alpha,
752 dIFcutoff = args$dIFcutoff,
753 plot = TRUE,
754 plotGenes = args$plotGenes,
755 simplifyLocation = args$simplifyLocation,
756 removeEmptyConsequences = args$removeEmptyConsequences,
757 returnResult = TRUE,
758 localTheme = theme_bw()
759 )
760 dev.off()
761
762 write.table(
763 consequenceSummary,
764 file = "consequencesSummary.tsv",
765 quote = FALSE,
766 sep = "\t",
767 col.names = TRUE,
768 row.names = FALSE
769 )
770
771
772 outputFile <- file.path(getwd(), "consequencesEnrichment.pdf")
773 pdf(
774 file = outputFile,
775 onefile = FALSE,
776 height = 6,
777 width = 9
778 )
779 consequenceEnrichment <- extractConsequenceEnrichment(
780 SwitchList,
781 consequencesToAnalyze = "all",
782 alpha = args$alpha,
783 dIFcutoff = args$dIFcutoff,
784 countGenes = args$countGenes,
785 analysisOppositeConsequence = args$analysisOppositeConsequence,
786 plot = TRUE,
787 localTheme = theme_bw(base_size = 12),
788 minEventsForPlotting = 10,
789 returnResult = TRUE
790 )
791 dev.off()
792
793 write.table(
794 consequenceEnrichment,
795 file = "consequencesEnrichment.tsv",
796 quote = FALSE,
797 sep = "\t",
798 col.names = TRUE,
799 row.names = FALSE
800 )
801
802
803 outputFile <- file.path(getwd(), "splicingEnrichment.pdf")
804 pdf(
805 file = outputFile,
806 onefile = FALSE,
807 height = 6,
808 width = 9
809 )
810 splicingEnrichment <- extractSplicingEnrichment(
811 SwitchList,
812 splicingToAnalyze = "all",
813 alpha = args$alpha,
814 dIFcutoff = args$dIFcutoff,
815 onlySigIsoforms = args$onlySigIsoforms,
816 countGenes = args$countGenes,
817 plot = TRUE,
818 minEventsForPlotting = 10,
819 returnResult = TRUE
820 )
821 dev.off()
822
823 write.table(
824 splicingEnrichment,
825 file = "splicingEnrichment.tsv",
826 quote = FALSE,
827 sep = "\t",
828 col.names = TRUE,
829 row.names = FALSE
830 )
831
832
833 outputFile <- file.path(getwd(), "splicingSummary.pdf")
834 pdf(
835 file = outputFile,
836 onefile = FALSE,
837 height = 6,
838 width = 9
839 )
840 splicingSummary <- extractSplicingSummary(
841 SwitchList,
842 splicingToAnalyze = "all",
843 asFractionTotal = args$asFractionTotal,
844 alpha = args$alpha,
845 dIFcutoff = args$dIFcutoff,
846 onlySigIsoforms = args$onlySigIsoforms,
847 plot = TRUE,
848 plotGenes = args$plotGenes,
849 localTheme = theme_bw(),
850 returnResult = TRUE
851 )
852 dev.off()
853
854 write.table(
855 splicingSummary,
856 file = "splicingSummary.tsv",
857 quote = FALSE,
858 sep = "\t",
859 col.names = TRUE,
860 row.names = FALSE
861 )
862
863
864 ### Volcano like plot:
865 outputFile <- file.path(getwd(), "volcanoPlot.pdf")
866 pdf(
867 file = outputFile,
868 onefile = FALSE,
869 height = 6,
870 width = 9
871 )
872 ggplot(data = SwitchList$isoformFeatures, aes(x = dIF, y = -log10(isoform_switch_q_value))) +
873 geom_point(aes(color = abs(dIF) > 0.1 &
874 isoform_switch_q_value < 0.05), # default cutoff
875 size = 1) +
876 geom_hline(yintercept = -log10(0.05), linetype = "dashed") + # default cutoff
877 geom_vline(xintercept = c(-0.1, 0.1), linetype = "dashed") + # default cutoff
878 facet_wrap(~ condition_2) +
879 scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) +
880 labs(x = "dIF", y = "-Log10 ( Isoform Switch Q Value )") +
881 theme_bw()
882 dev.off()
883
884
885 ### Switch vs Gene changes:
886 outputFile <- file.path(getwd(), "switchGene.pdf")
887 pdf(
888 file = outputFile,
889 onefile = FALSE,
890 height = 6,
891 width = 9
892 )
893 ggplot(data = SwitchList$isoformFeatures,
894 aes(x = gene_log2_fold_change, y = dIF)) +
895 geom_point(aes(color = abs(dIF) > 0.1 &
896 isoform_switch_q_value < 0.05),
897 size = 1) +
898 facet_wrap(~ condition_2) +
899 geom_hline(yintercept = 0, linetype = "dashed") +
900 geom_vline(xintercept = 0, linetype = "dashed") +
901 scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) +
902 labs(x = "Gene log2 fold change", y = "dIF") +
903 theme_bw()
904 dev.off()
905
906 outputFile <- file.path(getwd(), "splicingGenomewide.pdf")
907 pdf(
908 file = outputFile,
909 onefile = FALSE,
910 height = 6,
911 width = 9
912 )
913 splicingGenomeWide <- extractSplicingGenomeWide(
914 SwitchList,
915 featureToExtract = "all",
916 splicingToAnalyze = c("A3", "MES", "ATSS"),
917 plot = TRUE,
918 returnResult = TRUE
919 )
920 dev.off()
921 }
922 save(SwitchList, file = "SwitchList.Rda")
923
924 }