annotate normalize.r @ 1:8389b0c211ae draft

Uploaded
author ynewton
date Thu, 13 Dec 2012 11:19:57 -0500
parents
children f3fe0f64fe91
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
1 #!/usr/bin/Rscript
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
2
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
3 #usage, options and doc goes here
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
4 argspec <- c("normalize.r - takes any flat file and normalizes the rows or the columns using various normalizations (median_shift, mean_shift, t_statistic (z-score), exp_fit, normal_fit, weibull_0.5_fit, weibull_1_fit, weibull_1.5_fit, weibull_5_fit). Requires a single header line and a single cloumn of annotation.
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
5 Usage:
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
6 normalize.r input.tab norm_type norm_by > output.tab
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
7 Example:
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
8 Rscript normalize.r test_matrix.tab median_shift column > output.tab
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
9 Rscript normalize.r test_matrix.tab mean_shift row normals.tab > output.tab
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
10 Options:
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
11 input matrix (annotated by row and column names)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
12 normalization type; available options:
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
13 median_shift - shifts all values by the median or the row/column if no normals are specified, otherwise shifts by the median of normals
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
14 mean_shift - shifts all values by the mean or the row/column if no normals are specified, otherwise shifts by the mean of normals
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
15 t_statistic - converts all values to z-scores; if normals are specified then converts to z-scores within normal and non-normal classes separately
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
16 exponential_fit - (only by column) ranks data and transforms exponential CDF
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
17 normal_fit - (only by column) ranks data and transforms normal CDF
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
18 weibull_0.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 0.5
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
19 weibull_1_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
20 weibull_1.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1.5
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
21 weibull_5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 5
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
22 normalization by:
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
23 row
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
24 column
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
25 normals_file is an optional parameter which contains either a list of column headers from the input matrix, which should be considered as normals, or a matrix of normal samples
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
26 output file is specified through redirect character >")
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
27
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
28 read_matrix <- function(in_file){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
29 header <- strsplit(readLines(con=in_file, n=1), "\t")[[1]]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
30 cl.cols<- 1:length(header) > 1
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
31 data_matrix.df <- read.delim(in_file, header=TRUE, row.names=NULL, stringsAsFactors=FALSE, na.strings="NA", check.names=FALSE)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
32 data_matrix <- as.matrix(data_matrix.df[,cl.cols])
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
33 rownames(data_matrix) <- data_matrix.df[,1]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
34 return(data_matrix)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
35
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
36 #read_mtrx <- as.matrix(read.table(in_file, header=TRUE, sep="", row.names=NULL, stringsAsFactors=FALSE, na.strings="NA")) #separate on white characters
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
37 #read_mtrx[,1]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
38
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
39 #return(as.matrix(read.table(in_file, header=TRUE, sep="", row.names=1))) #separate on white characters
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
40 #mtrx <- read.delim(in_file, header=TRUE, sep="", row.names=NULL, stringsAsFactors=FALSE, na.strings="NA")
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
41 #print(mtrx[1,])
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
42 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
43
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
44 write_matrix <- function(data_matrix){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
45 header <- append(c("Genes"), colnames(data_matrix))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
46 write.table(t(header), stdout(), quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
47 write.table(data_matrix, stdout(), quote=FALSE, sep="\t", row.names=TRUE, col.names=FALSE)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
48 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
49
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
50 read_normals <- function(in_file){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
51 #return(as.matrix(read.table(in_file, header=FALSE, sep="", as.is = TRUE))[, 1])
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
52 return(as.matrix(read.table(in_file, header=FALSE, sep="", as.is = TRUE)))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
53 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
54
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
55 normalize <- function(data_matrix, norm_type, normals_list, tumors_list){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
56 if(norm_type == 'MEDIAN_SHIFT'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
57 return(shift(data_matrix, 'MEDIAN', normals_list, tumors_list))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
58 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
59 else if(norm_type == 'MEAN_SHIFT'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
60 return(shift(data_matrix, 'MEAN', normals_list, tumors_list))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
61 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
62 else if(norm_type == 'T_STATISTIC'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
63 return(compute_z_score(data_matrix, normals_list, tumors_list))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
64 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
65 else if(norm_type == 'EXPONENTIAL_FIT'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
66 return(fit_distribution(data_matrix, 'EXPONENTIAL'))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
67 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
68 else if(norm_type == 'NORMAL_FIT'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
69 return(fit_distribution(data_matrix, 'NORMAL'))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
70 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
71 else if(norm_type == 'WEIBULL_0.5_FIT'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
72 return(fit_distribution(data_matrix, 'WEIBULL_0.5'))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
73 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
74 else if(norm_type == 'WEIBULL_1_FIT'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
75 return(fit_distribution(data_matrix, 'WEIBULL_1'))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
76 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
77 else if(norm_type == 'WEIBULL_1.5_FIT'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
78 return(fit_distribution(data_matrix, 'WEIBULL_1.5'))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
79 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
80 else if(norm_type == 'WEIBULL_5_FIT'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
81 return(fit_distribution(data_matrix, 'WEIBULL_5'))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
82 }else{
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
83 write("ERROR: unknown normalization type", stderr());
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
84 q();
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
85 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
86 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
87
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
88 shift <- function(data_matrix, shift_type, normals_list, tumors_list){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
89 return(t(apply(data_matrix, 1, shift_normalize_row, norm_type=shift_type, normals_list=normals_list, tumors_list=tumors_list)))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
90 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
91
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
92 shift_normalize_row <- function(data_row, norm_type, normals_list, tumors_list){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
93 if(length(normals_list) == 0){ #no normals are specified
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
94 if(norm_type == 'MEDIAN'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
95 row_stat <- median(data_row)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
96 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
97 else if(norm_type == 'MEAN'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
98 row_stat <- mean(data_row)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
99 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
100 return(unlist(lapply(data_row, function(x){return(x - row_stat);})))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
101 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
102 else{ #normals are specified
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
103 normal_values <- data_row[normals_list]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
104 tumor_columns <- data_row[tumors_list]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
105
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
106 if(norm_type == 'MEDIAN'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
107 row_stat <- median(normal_values)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
108 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
109 else if(norm_type == 'MEAN'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
110 row_stat <- mean(normal_values)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
111 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
112 return(unlist(lapply(tumor_columns, function(x){return(x - row_stat);})))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
113 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
114 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
115
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
116 compute_z_score <- function(data_matrix, normals_list, tumors_list){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
117 return(t(apply(data_matrix, 1, t_stat_normalize_row, normals_list=normals_list, tumors_list=tumors_list)))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
118 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
119
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
120 t_stat_normalize_row <- function(data_row, normals_list, tumors_list){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
121 if(length(normals_list) == 0){ #no normals are specified
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
122 row_mean <- mean(data_row)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
123 row_sd <- sd(data_row)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
124 return(unlist(lapply(data_row, function(x){return((x - row_mean)/row_sd);})))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
125 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
126 else{ #normals are specified
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
127 normal_values <- data_row[normals_list]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
128 normal_mean <- mean(normal_values)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
129 normal_sd <- sd(normal_values)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
130 normalized_normals <- unlist(lapply(normal_values, function(x){return((x - normal_mean)/normal_sd);}))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
131
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
132 tumor_values <- data_row[tumors_list]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
133 normalized_tumors <- unlist(lapply(tumor_values, function(x){return((x - normal_mean)/normal_sd);}))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
134
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
135 return(append(normalized_normals, normalized_tumors))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
136 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
137 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
138
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
139 rankNA <- function(col){ #originally written by Dan Carlin
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
140 col[!is.na(col)]<-(rank(col[!is.na(col)])/sum(!is.na(col)))-(1/sum(!is.na(col)))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
141 return(col)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
142 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
143
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
144 fit_distribution <- function(data_matrix, dist){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
145 if(dist == 'EXPONENTIAL'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
146 ranked_data_matrix <- apply(data_matrix,1,rankNA) #idea by Dan Carlin
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
147 #write.table(c("ranked data:"), stdout(), quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
148 #write.table(ranked_data_matrix, stdout(), quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
149 return(apply(ranked_data_matrix, 1, qexp))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
150 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
151 else if(dist == 'NORMAL'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
152 ranked_data_matrix <- apply(data_matrix,2,rankNA)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
153 return(apply(ranked_data_matrix, c(1,2), qnorm, mean=0, sd=1))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
154 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
155 else if(dist == 'WEIBULL_0.5'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
156 ranked_data_matrix <- apply(data_matrix,2,rankNA)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
157 return(apply(ranked_data_matrix, c(1,2), qweibull, scale=1, shape=0.5))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
158 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
159 else if(dist == 'WEIBULL_1'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
160 ranked_data_matrix <- apply(data_matrix,2,rankNA)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
161 return(apply(ranked_data_matrix, c(1,2), qweibull, scale=1, shape=1))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
162 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
163 else if(dist == 'WEIBULL_1.5'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
164 ranked_data_matrix <- apply(data_matrix,2,rankNA)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
165 return(apply(ranked_data_matrix, c(1,2), qweibull, scale=1, shape=1.5))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
166 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
167 else if(dist == 'WEIBULL_5'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
168 ranked_data_matrix <- apply(data_matrix,2,rankNA)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
169 return(apply(ranked_data_matrix, c(1,2), qweibull, scale=1, shape=5))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
170 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
171 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
172
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
173 main <- function(argv) {
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
174 #determine if correct number of arguments are specified and if normals are specified
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
175 with_normals = FALSE
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
176
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
177 if(length(argv) == 1){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
178 if(argv==c('--help')){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
179 write(argspec, stderr());
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
180 q();
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
181 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
182 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
183
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
184 if(!(length(argv) == 3 || length(argv) == 4)){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
185 write("ERROR: invalid number of arguments is specified", stderr());
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
186 q();
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
187 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
188
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
189 if(length(argv) == 4){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
190 with_normals = TRUE
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
191 normals_file <- argv[4]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
192 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
193
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
194 #store command line arguments in variables:
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
195 input_file <- argv[1]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
196 norm_type <- toupper(argv[2])
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
197 norm_by <- toupper(argv[3])
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
198
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
199 #input_file <- "/Users/ynewton/school/ucsc/projects/stuart_lab/data_normalization/test_matrix.tab"
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
200 #norm_type <- "MEAN_SHIFT"
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
201 #norm_by <- "ROW"
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
202 #normals_file <- "/Users/ynewton/school/ucsc/projects/stuart_lab/data_normalization/test_matrix2.tab"
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
203 #normals_file2 <- "/Users/ynewton/school/ucsc/projects/stuart_lab/data_normalization/normals.tab"
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
204
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
205 #read the input file(s):
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
206 data_matrix <- read_matrix(input_file)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
207
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
208 if(with_normals){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
209 normals <- read_normals(normals_file)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
210 if(length(colnames(normals)) == 1){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
211 normals_indices <- which(colnames(data_matrix) %in% normals)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
212 tumor_indices <- which(!(colnames(data_matrix) %in% normals))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
213 }else{
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
214 normals_numeric <- normals[2:length(normals[,1]),2:length(normals[1,])]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
215 normals_numeric <- apply(normals_numeric, 2, as.numeric)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
216 rownames(normals_numeric) <- normals[,1][2:length(normals[,1])]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
217 colnames(normals_numeric) <- normals[1,][2:length(normals[1,])]
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
218
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
219 combined_matrix <- cbind(data_matrix, normals_numeric)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
220 tumor_indices <- c(1:length(data_matrix[1,]))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
221 normals_indices <- c(length(tumor_indices)+1:length(normals_numeric[1,]))
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
222 data_matrix <- combined_matrix
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
223 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
224 }else{
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
225 normals_indices <- c()
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
226 tumor_indices <- c()
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
227 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
228
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
229 #if normalize by columns then transpose the matrix:
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
230 if(norm_by == 'COLUMN'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
231 data_matrix <- t(data_matrix)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
232 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
233
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
234 #normalize:
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
235 data_matrix <- normalize(data_matrix, norm_type, normals_indices, tumor_indices)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
236
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
237 #if normalize by columns then transpose the matrix again since we normalized the transposed matrix by row:
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
238 if(norm_by == 'COLUMN'){
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
239 data_matrix <- t(data_matrix)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
240 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
241
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
242 write_matrix(data_matrix)
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
243 }
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
244
8389b0c211ae Uploaded
ynewton
parents:
diff changeset
245 main(commandArgs(TRUE))