Mercurial > repos > nturaga > minfi_pipeline
comparison minfi_TCGA_pipeline.R @ 0:84361ce36a11 draft
planemo upload commit fb90aafc93e5e63acfcdac4c27cfd865cdf06c5a-dirty
author | nturaga |
---|---|
date | Tue, 19 Apr 2016 11:10:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:84361ce36a11 |
---|---|
1 # setup R error handling to go to stderr | |
2 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) | |
3 | |
4 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
5 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
6 | |
7 library("getopt") | |
8 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | |
9 args <- commandArgs(trailingOnly = TRUE) | |
10 | |
11 # get options, using the spec as defined by the enclosed list. | |
12 # we read the options from the default: commandArgs(TRUE). | |
13 spec <- matrix(c( | |
14 'quiet', 'q', 2, "logical", | |
15 'help' , 'h', 0, "logical", | |
16 'tarfile','f',1,"character", | |
17 "cores","c",1,"integer", | |
18 "b_permutations","b",2,"integer", | |
19 "smooth","m",2,"logical", | |
20 "l_value","l",2,"integer") | |
21 ,byrow=TRUE, ncol=4) | |
22 opt <- getopt(spec) | |
23 | |
24 ## If help was asked for print a friendly message | |
25 ## and exit with a non-zero error code | |
26 if (!is.null(opt$help)) { | |
27 cat(getopt(spec, usage=TRUE)) | |
28 q(status=1) | |
29 } | |
30 | |
31 | |
32 ## Set verbose mode | |
33 verbose = if(is.null(opt$quiet)){TRUE}else{FALSE} | |
34 if(verbose){ | |
35 cat("Verbose mode is ON\n\n") | |
36 } | |
37 | |
38 ## Load required libraries | |
39 suppressPackageStartupMessages({ | |
40 library("minfi") | |
41 library("FlowSorted.Blood.450k") | |
42 library("TxDb.Hsapiens.UCSC.hg19.knownGene") | |
43 library("doParallel") | |
44 library("tools") | |
45 }) | |
46 | |
47 | |
48 config_file = "tcga_temp/config.txt" | |
49 conf = read.csv(config_file,stringsAsFactors=FALSE,header=F) | |
50 tarfile_name = gsub(".+/","",conf$V2) | |
51 dataset_path = conf$V1 | |
52 | |
53 cmd = paste("ln -s",dataset_path,tarfile_name,sep=" ") | |
54 cat("Command : ", cmd,"\n") | |
55 system(cmd) | |
56 | |
57 tarfile = tarfile_name | |
58 cat ("tarfile name: ",tarfile," file ext: ",file_ext(tarfile)) | |
59 ## UNtar files in R first | |
60 if (file_ext(tarfile) == "tar"){ | |
61 cat("Entering IF statment") | |
62 tar_contents = untar(tarfile,list=TRUE) | |
63 cat("regex failing here") | |
64 f = as.character(tar_contents[grep(".data.txt",fixed=TRUE,x=tar_contents)]) | |
65 if (!is.null(f)){ | |
66 cat("Untar being attempted") | |
67 untar(tarfile, exdir = ".",files=f) | |
68 cat("Untar succcess") | |
69 } | |
70 } | |
71 ## Move file from sub directory to main directory | |
72 from = list.files(pattern=".data.txt",recursive=TRUE) | |
73 to = gsub(".+/","",from) | |
74 rename_success = file.rename(from=from, to=to) | |
75 | |
76 | |
77 # This should pass only if steps have been successful | |
78 stopifnot(rename_success) | |
79 if (rename_success){ | |
80 input_file = to | |
81 } | |
82 | |
83 ### Read the TCGA data | |
84 GRset = readTCGA(input_file, sep = "\t", keyName = "Composite Element REF", Betaname = "Beta_value", pData = NULL, array = "IlluminaHumanMethylation450k",showProgress=TRUE) | |
85 | |
86 ### Get beta values | |
87 beta = getBeta(GRset) | |
88 pd = pData(GRset) | |
89 CN = getCN(GRset) | |
90 chr = seqnames(GRset) | |
91 pos = start(GRset) | |
92 chr = as.character(chr) | |
93 | |
94 | |
95 # Assign phenotype information | |
96 ## Based on TCGA sample naming, TCGA-2E-A9G8-01A-11D-A409-05, char 14,15 represent | |
97 ## phenotypic status of sample, 01 = cancer, 11=normal | |
98 pd$status = ifelse(test= (substr(rownames(pd),14,15) == "01"),yes="cancer",no="normal") | |
99 | |
100 ### DMP finder | |
101 dmp = dmpFinder(dat=beta,pheno=pd$status,type="categorical") | |
102 write.csv(dmp,file="dmps.csv",quote=FALSE,row.names=TRUE) | |
103 if(verbose){ | |
104 cat("DMP Finder successful \n") | |
105 } | |
106 | |
107 | |
108 ## Make design matrix for bumphunting | |
109 #Model Matrix | |
110 T1="normal";T2="cancer" | |
111 ## Introduce error if levels are different | |
112 stopifnot(T1!=T2) | |
113 keep=pd$status%in%c(T1,T2) | |
114 tt=factor(pd$status[keep],c(T1,T2)) | |
115 design=model.matrix(~tt) | |
116 | |
117 | |
118 ## Start bumphunter in a parallel environment | |
119 ## Parallelize over cores on machine | |
120 library(doParallel) | |
121 registerDoParallel(cores = opt$cores) | |
122 | |
123 # Bumphunter Run with object processed with default Quantile Normalization | |
124 # provided along with TCGA data | |
125 dmrs = bumphunter(beta[,keep],chr=chr,pos=pos,design=design,B=opt$b_permutations,smooth=opt$smooth,pickCutoff =TRUE) | |
126 bumps = dmrs$tab | |
127 | |
128 if(verbose){ | |
129 cat("Bumphunter result", "\n") | |
130 head(bumps) | |
131 } | |
132 | |
133 | |
134 ### Choose DMR's of a certain length threshold. | |
135 ### This helps reduce the size of DMRs early, and match | |
136 ### with genes closest to region | |
137 bumps = bumps[bumps$L > opt$l_value,] | |
138 genes <- annotateTranscripts(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
139 tab=matchGenes(bumps,genes) | |
140 annotated_dmrs=cbind(bumps,tab) | |
141 | |
142 if(verbose){ | |
143 cat("Match with annotation\n") | |
144 head(annotated_dmrs) | |
145 } | |
146 | |
147 ## Save result, which contains DMR's and closest genes | |
148 write.csv(annotated_dmrs,file = "dmrs.csv",quote=FALSE,row.names=FALSE) | |
149 | |
150 ## Garbage collect | |
151 gc() | |
152 | |
153 |