Mercurial > repos > davidvanzessen > prisca
comparison RScript.r @ 2:7ffd0fba8cf4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 18 Sep 2017 09:01:19 -0400 |
parents | 75853bceec00 |
children | 20f0df3721aa |
comparison
equal
deleted
inserted
replaced
1:75853bceec00 | 2:7ffd0fba8cf4 |
---|---|
16 library(grid) | 16 library(grid) |
17 library(parallel) | 17 library(parallel) |
18 #require(xtable) | 18 #require(xtable) |
19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T) | 19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T) |
20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F) | 20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F) |
21 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")] | 21 |
22 needed_cols = c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence") | |
23 if(!all(needed_cols %in% names(dat))){ | |
24 cat("Missing column(s):<br />", file=logfile, append=F) | |
25 missing_cols = needed_cols[!(needed_cols %in% names(dat))] | |
26 for(col in missing_cols){ | |
27 cat(paste(col, "<br />"), file=logfile, append=T) | |
28 } | |
29 stop("Not all columns are present") | |
30 } | |
31 | |
32 if(!("Total_Read_Count" %in% names(dat))){ | |
33 dat$Total_Read_Count = 0 | |
34 } | |
35 | |
36 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")] | |
37 | |
22 dat$dsPerM = 0 | 38 dat$dsPerM = 0 |
23 dat = dat[!is.na(dat$Patient),] | 39 dat = dat[!is.na(dat$Patient),] |
24 dat$Related_to_leukemia_clone = F | 40 dat$Related_to_leukemia_clone = F |
25 | 41 |
26 setwd(outDir) | 42 setwd(outDir) |
74 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") | 90 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") |
75 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb") | 91 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb") |
76 Titles = factor(Titles, levels=Titles) | 92 Titles = factor(Titles, levels=Titles) |
77 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) | 93 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) |
78 | 94 |
79 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0)) | 95 single_patients = dat[NULL,] |
80 | 96 |
81 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... | 97 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... |
82 patient.merge.list.second = list() | 98 patient.merge.list.second = list() |
83 scatter_locus_data_list = list() | 99 scatter_locus_data_list = list() |
84 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T) | 100 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T) |
94 onShort = "freq" | 110 onShort = "freq" |
95 } | 111 } |
96 onx = paste(on, ".x", sep="") | 112 onx = paste(on, ".x", sep="") |
97 ony = paste(on, ".y", sep="") | 113 ony = paste(on, ".y", sep="") |
98 splt = split(x, x$Sample, drop=T) | 114 splt = split(x, x$Sample, drop=T) |
115 print(splt) | |
99 type="pair" | 116 type="pair" |
100 if(length(splt) == 1){ | 117 if(length(splt) == 1){ |
101 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) | 118 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) |
102 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0)) | 119 splt[[2]] = splt[[1]][NULL,] |
103 type="single" | 120 type="single" |
104 } | 121 } |
105 patient1 = splt[[1]] | 122 patient1 = splt[[1]] |
106 patient2 = splt[[2]] | 123 patient2 = splt[[2]] |
107 | 124 |
108 threshholdIndex = which(colnames(product) == "interval") | 125 oneSample = patient1[1,"Sample"] |
109 V_SegmentIndex = which(colnames(product) == "V_Segments") | 126 twoSample = patient2[1,"Sample"] |
110 J_SegmentIndex = which(colnames(product) == "J_Segments") | 127 patient = x[1,"Patient"] |
111 titleIndex = which(colnames(product) == "Titles") | 128 |
112 sampleIndex = which(colnames(x) == "Sample") | |
113 patientIndex = which(colnames(x) == "Patient") | |
114 oneSample = paste(patient1[1,sampleIndex], sep="") | |
115 twoSample = paste(patient2[1,sampleIndex], sep="") | |
116 patient = paste(x[1,patientIndex]) | |
117 | |
118 switched = F | 129 switched = F |
119 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){ | 130 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){ |
120 tmp = twoSample | 131 tmp = twoSample |
121 twoSample = oneSample | 132 twoSample = oneSample |
122 oneSample = tmp | 133 oneSample = tmp |
137 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | 148 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) |
138 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | 149 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) |
139 } | 150 } |
140 | 151 |
141 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") | 152 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") |
142 #scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns]) | |
143 scatterplot_data = patient1[NULL,scatterplot_data_columns] | 153 scatterplot_data = patient1[NULL,scatterplot_data_columns] |
144 #scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] | |
145 #scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both")) | |
146 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")) | 154 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")) |
147 #scatterplot_data$type = factor(x=NULL, levels=scatterplot.data.type.factor) | |
148 scatterplot_data$type = character(0) | 155 scatterplot_data$type = character(0) |
149 scatterplot_data$link = numeric(0) | 156 scatterplot_data$link = numeric(0) |
150 scatterplot_data$on = character(0) | 157 scatterplot_data$on = character(0) |
151 | 158 |
152 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy' | |
153 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh | 159 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh |
154 | 160 |
155 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence | 161 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence |
156 | 162 |
157 start.time = proc.time() | 163 start.time = proc.time() |
158 merge.list = c() | 164 merge.list = c() |
159 | 165 |
160 if(patient %in% names(patient.merge.list)){ | 166 if(patient %in% names(patient.merge.list)){ |
161 patientMerge = patient.merge.list[[patient]] | 167 patientMerge = patient.merge.list[[patient]] |
162 merge.list[["second"]] = patient.merge.list.second[[patient]] | 168 merge.list[["second"]] = patient.merge.list.second[[patient]] |
163 scatterplot_data = scatter_locus_data_list[[patient]] | 169 scatterplot_data = scatter_locus_data_list[[patient]] |
164 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) | 170 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) |
165 | 171 |
166 print(names(patient.merge.list)) | 172 print(names(patient.merge.list)) |
167 } else { | 173 } else { |
168 #fuzzy matching here... | 174 #fuzzy matching here... |
169 #merge.list = patientMerge$merge | 175 |
170 | |
171 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),] | |
172 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] | |
173 | |
174 patient1.fuzzy = patient1 | 176 patient1.fuzzy = patient1 |
175 patient2.fuzzy = patient2 | 177 patient2.fuzzy = patient2 |
176 | 178 |
177 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) | |
178 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) | |
179 | |
180 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) | |
181 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) | |
182 | |
183 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) | 179 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) |
184 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) | 180 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) |
185 | 181 |
186 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove? | |
187 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] | |
188 | |
189 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | |
190 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | |
191 | |
192 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy) | 182 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy) |
193 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] | 183 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] |
194 | 184 |
195 merge.list = list() | 185 merge.list = list() |
196 | 186 |
197 merge.list[["second"]] = vector() | 187 merge.list[["second"]] = vector() |
198 | 188 |
199 link.count = 1 | 189 link.count = 1 |
200 | 190 |
201 while(nrow(patient.fuzzy) > 1){ | 191 while(nrow(patient.fuzzy) > 1){ |
202 first.merge = patient.fuzzy[1,"merge"] | 192 first.merge = patient.fuzzy[1,"merge"] |
203 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] | 193 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] |
204 first.sample = patient.fuzzy[1,"Sample"] | 194 first.sample = patient.fuzzy[1,"Sample"] |
205 merge.filter = first.merge == patient.fuzzy$merge | 195 merge.filter = first.merge == patient.fuzzy$merge |
206 | 196 |
207 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 | 197 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 |
208 | 198 |
209 first.sample.filter = first.sample == patient.fuzzy$Sample | 199 first.sample.filter = first.sample == patient.fuzzy$Sample |
210 second.sample.filter = first.sample != patient.fuzzy$Sample | 200 second.sample.filter = first.sample != patient.fuzzy$Sample |
211 | 201 |
212 #first match same sample, sum to a single row, same for other sample | 202 #first match same sample, sum to a single row, same for other sample |
213 #then merge rows like 'normal' | 203 #then merge rows like 'normal' |
214 | 204 |
215 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) | 205 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) |
216 | 206 |
217 | 207 |
218 | 208 |
219 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter | 209 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter |
220 first.match.filter = merge.filter & sequence.filter & first.sample.filter | 210 first.match.filter = merge.filter & sequence.filter & first.sample.filter |
221 second.match.filter = merge.filter & sequence.filter & second.sample.filter | 211 second.match.filter = merge.filter & sequence.filter & second.sample.filter |
222 | 212 |
223 first.rows = patient.fuzzy[first.match.filter,] | 213 first.rows = patient.fuzzy[first.match.filter,] |
224 second.rows = patient.fuzzy[second.match.filter,] | 214 second.rows = patient.fuzzy[second.match.filter,] |
225 | 215 |
226 first.rows.v = table(first.rows$V_Segment_Major_Gene) | 216 first.rows.v = table(first.rows$V_Segment_Major_Gene) |
227 first.rows.v = names(first.rows.v[which.max(first.rows.v)]) | 217 first.rows.v = names(first.rows.v[which.max(first.rows.v)]) |
228 first.rows.j = table(first.rows$J_Segment_Major_Gene) | 218 first.rows.j = table(first.rows$J_Segment_Major_Gene) |
229 first.rows.j = names(first.rows.j[which.max(first.rows.j)]) | 219 first.rows.j = names(first.rows.j[which.max(first.rows.j)]) |
230 | 220 |
231 first.sum = data.frame(merge = first.clone.sequence, | 221 first.sum = data.frame(merge = first.clone.sequence, |
232 Patient = patient, | 222 Patient = patient, |
233 Receptor = first.rows[1,"Receptor"], | 223 Receptor = first.rows[1,"Receptor"], |
234 Sample = first.rows[1,"Sample"], | 224 Sample = first.rows[1,"Sample"], |
235 Cell_Count = first.rows[1,"Cell_Count"], | 225 Cell_Count = first.rows[1,"Cell_Count"], |
247 locus_J = first.rows[1,"locus_J"], | 237 locus_J = first.rows[1,"locus_J"], |
248 min_cell_count = first.rows[1,"min_cell_count"], | 238 min_cell_count = first.rows[1,"min_cell_count"], |
249 normalized_read_count = sum(first.rows$normalized_read_count), | 239 normalized_read_count = sum(first.rows$normalized_read_count), |
250 paste = first.rows[1,"paste"], | 240 paste = first.rows[1,"paste"], |
251 min_cell_paste = first.rows[1,"min_cell_paste"]) | 241 min_cell_paste = first.rows[1,"min_cell_paste"]) |
252 | 242 |
253 if(nrow(second.rows) > 0){ | 243 if(nrow(second.rows) > 0){ |
254 second.rows.v = table(second.rows$V_Segment_Major_Gene) | 244 second.rows.v = table(second.rows$V_Segment_Major_Gene) |
255 second.rows.v = names(second.rows.v[which.max(second.rows.v)]) | 245 second.rows.v = names(second.rows.v[which.max(second.rows.v)]) |
256 second.rows.j = table(second.rows$J_Segment_Major_Gene) | 246 second.rows.j = table(second.rows$J_Segment_Major_Gene) |
257 second.rows.j = names(second.rows.j[which.max(second.rows.j)]) | 247 second.rows.j = names(second.rows.j[which.max(second.rows.j)]) |
258 | 248 |
259 second.sum = data.frame(merge = first.clone.sequence, | 249 second.sum = data.frame(merge = first.clone.sequence, |
260 Patient = patient, | 250 Patient = patient, |
261 Receptor = second.rows[1,"Receptor"], | 251 Receptor = second.rows[1,"Receptor"], |
262 Sample = second.rows[1,"Sample"], | 252 Sample = second.rows[1,"Sample"], |
263 Cell_Count = second.rows[1,"Cell_Count"], | 253 Cell_Count = second.rows[1,"Cell_Count"], |
264 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes), | 254 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes), |
265 Log10_Frequency = log10(sum(second.rows$Frequency)), | 255 Log10_Frequency = log10(sum(second.rows$Frequency)), |
266 Total_Read_Count = sum(second.rows$Total_Read_Count), | 256 Total_Read_Count = sum(second.rows$Total_Read_Count), |
267 dsPerM = sum(second.rows$dsPerM), | 257 dsPerM = sum(second.rows$dsPerM), |
268 J_Segment_Major_Gene = second.rows.j, | 258 J_Segment_Major_Gene = second.rows.j, |
269 V_Segment_Major_Gene = second.rows.v, | 259 V_Segment_Major_Gene = second.rows.v, |
270 Clone_Sequence = first.clone.sequence, | 260 Clone_Sequence = first.clone.sequence, |
271 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"], | 261 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"], |
272 Related_to_leukemia_clone = F, | 262 Related_to_leukemia_clone = F, |
273 Frequency = sum(second.rows$Frequency), | 263 Frequency = sum(second.rows$Frequency), |
274 locus_V = second.rows[1,"locus_V"], | 264 locus_V = second.rows[1,"locus_V"], |
275 locus_J = second.rows[1,"locus_J"], | 265 locus_J = second.rows[1,"locus_J"], |
276 min_cell_count = second.rows[1,"min_cell_count"], | 266 min_cell_count = second.rows[1,"min_cell_count"], |
277 normalized_read_count = sum(second.rows$normalized_read_count), | 267 normalized_read_count = sum(second.rows$normalized_read_count), |
278 paste = second.rows[1,"paste"], | 268 paste = second.rows[1,"paste"], |
279 min_cell_paste = second.rows[1,"min_cell_paste"]) | 269 min_cell_paste = second.rows[1,"min_cell_paste"]) |
280 | 270 |
271 #print(names(patientMerge)) | |
272 #print(merge(first.sum, second.sum, by="merge")) | |
281 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge")) | 273 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge")) |
274 #print("test2") | |
282 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),] | 275 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),] |
283 | 276 |
284 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) | 277 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) |
285 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | 278 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) |
286 | 279 |
287 tmp.rows = rbind(first.rows, second.rows) | 280 tmp.rows = rbind(first.rows, second.rows) |
281 #print("test3") | |
288 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),] | 282 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),] |
289 | 283 |
290 | 284 |
291 #add to the scatterplot data | 285 #add to the scatterplot data |
292 scatterplot.row = first.sum[,scatterplot_data_columns] | 286 scatterplot.row = first.sum[,scatterplot_data_columns] |
293 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both") | 287 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both") |
294 scatterplot.row$link = link.count | 288 scatterplot.row$link = link.count |
295 scatterplot.row$on = onShort | 289 scatterplot.row$on = onShort |
296 | 290 |
297 scatterplot_data = rbind(scatterplot_data, scatterplot.row) | 291 scatterplot_data = rbind(scatterplot_data, scatterplot.row) |
298 | 292 |
299 scatterplot.row = second.sum[,scatterplot_data_columns] | 293 scatterplot.row = second.sum[,scatterplot_data_columns] |
300 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both") | 294 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both") |
301 scatterplot.row$link = link.count | 295 scatterplot.row$link = link.count |
302 scatterplot.row$on = onShort | 296 scatterplot.row$on = onShort |
303 | 297 |
304 scatterplot_data = rbind(scatterplot_data, scatterplot.row) | 298 scatterplot_data = rbind(scatterplot_data, scatterplot.row) |
305 | 299 |
306 #write some information about the match to a log file | 300 #write some information about the match to a log file |
307 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) { | 301 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) { |
308 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T) | 302 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T) |
309 } else { | 303 } else { |
310 second.clone.sequence = second.rows[1,"Clone_Sequence"] | 304 second.clone.sequence = second.rows[1,"Clone_Sequence"] |
311 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){ | 305 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){ |
312 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T) | 306 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T) |
313 } else { | 307 } else { |
314 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T) | 308 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T) |
315 } | 309 } |
316 } | 310 } |
317 | 311 |
318 } else if(nrow(first.rows) > 1) { | 312 } else if(nrow(first.rows) > 1) { |
319 if(patient1[1,"Sample"] == first.sample){ | 313 if(patient1[1,"Sample"] == first.sample){ |
320 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),] | 314 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),] |
321 patient1 = rbind(patient1, first.sum) | 315 patient1 = rbind(patient1, first.sum) |
322 } else { | 316 } else { |
323 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),] | 317 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),] |
324 patient2 = rbind(patient2, first.sum) | 318 patient2 = rbind(patient2, first.sum) |
325 } | 319 } |
326 | 320 |
327 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"]) | 321 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"]) |
328 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | 322 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) |
329 | 323 |
330 patient.fuzzy = patient.fuzzy[-first.match.filter,] | 324 patient.fuzzy = patient.fuzzy[-first.match.filter,] |
331 | 325 |
332 #add to the scatterplot data | 326 #add to the scatterplot data |
333 scatterplot.row = first.sum[,scatterplot_data_columns] | 327 scatterplot.row = first.sum[,scatterplot_data_columns] |
334 scatterplot.row$type = first.sum[,"Sample"] | 328 scatterplot.row$type = first.sum[,"Sample"] |
335 scatterplot.row$link = link.count | 329 scatterplot.row$link = link.count |
336 scatterplot.row$on = onShort | 330 scatterplot.row$on = onShort |
337 | 331 |
338 scatterplot_data = rbind(scatterplot_data, scatterplot.row) | 332 scatterplot_data = rbind(scatterplot_data, scatterplot.row) |
339 | 333 |
340 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T) | 334 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T) |
341 } else { | 335 } else { |
342 patient.fuzzy = patient.fuzzy[-1,] | 336 patient.fuzzy = patient.fuzzy[-1,] |
343 | 337 |
344 #add to the scatterplot data | 338 #add to the scatterplot data |
345 scatterplot.row = first.sum[,scatterplot_data_columns] | 339 scatterplot.row = first.sum[,scatterplot_data_columns] |
346 scatterplot.row$type = first.sum[,"Sample"] | 340 scatterplot.row$type = first.sum[,"Sample"] |
347 scatterplot.row$link = link.count | 341 scatterplot.row$link = link.count |
348 scatterplot.row$on = onShort | 342 scatterplot.row$on = onShort |
349 | 343 |
350 scatterplot_data = rbind(scatterplot_data, scatterplot.row) | 344 scatterplot_data = rbind(scatterplot_data, scatterplot.row) |
351 } | 345 } |
352 link.count = link.count + 1 | 346 link.count = link.count + 1 |
353 } | 347 } |
354 patient.merge.list[[patient]] <<- patientMerge | 348 patient.merge.list[[patient]] <<- patientMerge |
355 patient.merge.list.second[[patient]] <<- merge.list[["second"]] | 349 patient.merge.list.second[[patient]] <<- merge.list[["second"]] |
358 scatterplot_data = merge(scatterplot_data, sample.order, by="type") | 352 scatterplot_data = merge(scatterplot_data, sample.order, by="type") |
359 | 353 |
360 scatter_locus_data_list[[patient]] <<- scatterplot_data | 354 scatter_locus_data_list[[patient]] <<- scatterplot_data |
361 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) | 355 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) |
362 } | 356 } |
363 | 357 |
364 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),] | 358 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),] |
365 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),] | 359 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),] |
366 | 360 |
367 | 361 |
368 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | 362 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) |
369 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony]) | 363 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony]) |
370 res1 = vector() | 364 res1 = vector() |
371 res2 = vector() | 365 res2 = vector() |
375 locussum1 = vector() | 369 locussum1 = vector() |
376 locussum2 = vector() | 370 locussum2 = vector() |
377 | 371 |
378 #for(iter in 1){ | 372 #for(iter in 1){ |
379 for(iter in 1:length(product[,1])){ | 373 for(iter in 1:length(product[,1])){ |
380 threshhold = product[iter,threshholdIndex] | 374 threshhold = product[iter,"interval"] |
381 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 375 V_Segment = paste(".*", as.character(product[iter,"V_Segments"]), ".*", sep="") |
382 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 376 J_Segment = paste(".*", as.character(product[iter,"J_Segments"]), ".*", sep="") |
383 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold | 377 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold |
384 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold | 378 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold |
385 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge)) | 379 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge)) |
386 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge)) | 380 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge)) |
387 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) | 381 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) |
390 res2 = append(res2, sum(two)) | 384 res2 = append(res2, sum(two)) |
391 resBoth = append(resBoth, sum(both)) | 385 resBoth = append(resBoth, sum(both)) |
392 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count)) | 386 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count)) |
393 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count)) | 387 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count)) |
394 #threshhold = 0 | 388 #threshhold = 0 |
395 if(threshhold != 0){ | 389 if(threshhold != 0 | T){ |
396 if(sum(one) > 0){ | 390 if(sum(one) > 0){ |
397 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 391 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] |
398 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") | 392 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") |
399 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 393 filenameOne = paste(oneSample, "_", product[iter, "Titles"], "_", threshhold, sep="") |
400 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 394 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
401 } | 395 } |
402 if(sum(two) > 0){ | 396 if(sum(two) > 0){ |
403 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 397 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] |
404 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") | 398 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") |
405 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 399 filenameTwo = paste(twoSample, "_", product[iter, "Titles"], "_", threshhold, sep="") |
406 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 400 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
407 } | 401 } |
408 } else { | 402 } else { |
409 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] | 403 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] |
410 if(nrow(scatterplot_locus_data) > 0){ | 404 if(nrow(scatterplot_locus_data) > 0){ |
411 scatterplot_locus_data$Rearrangement = product[iter, titleIndex] | 405 scatterplot_locus_data$Rearrangement = product[iter, "Titles"] |
412 } | 406 } |
413 | 407 |
414 | 408 |
415 | 409 |
416 p = NULL | 410 p = NULL |
417 print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data))) | 411 #print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data))) |
418 if(nrow(scatterplot_locus_data) != 0){ | 412 if(nrow(scatterplot_locus_data) != 0){ |
419 if(on == "normalized_read_count"){ | 413 if(on == "normalized_read_count"){ |
420 write.table(scatterplot_locus_data, file=paste(oneSample, twoSample, product[iter, titleIndex], "scatterplot_locus_data.txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 414 write.table(scatterplot_locus_data, file=paste(oneSample, twoSample, product[iter, "Titles"], "scatterplot_locus_data.txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
421 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 415 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
422 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales, limits=c(1,1e6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) | 416 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales, limits=c(1,1e6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) |
423 } else { | 417 } else { |
424 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) | 418 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) |
425 } | 419 } |
426 p = p + geom_point(aes(colour=type), position="dodge") | 420 p = p + geom_point(aes(colour=type), position="dodge") |
427 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) | 421 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"])) |
428 } else { | 422 } else { |
429 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) | 423 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, titleIndex])) |
430 } | 424 } |
431 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) | 425 png(paste(patient1[1,"Patient"], "_", patient1[1,"Sample"], "_", patient2[1,"Sample"], "_", onShort, "_", product[iter, "Titles"],"_scatter.png", sep="")) |
432 print(p) | 426 print(p) |
433 dev.off() | 427 dev.off() |
434 } | 428 } |
435 if(sum(both) > 0){ | 429 if(sum(both) > 0){ |
436 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] | 430 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] |
437 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) | 431 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) |
438 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 432 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, "Titles"], "_", threshhold, sep="") |
439 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 433 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
440 } | 434 } |
441 } | 435 } |
442 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2) | 436 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2) |
443 if(sum(is.na(patientResult$percentage)) > 0){ | 437 if(sum(is.na(patientResult$percentage)) > 0){ |
485 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep="")) | 479 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep="")) |
486 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080) | 480 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080) |
487 print(plt) | 481 print(plt) |
488 dev.off() | 482 dev.off() |
489 } | 483 } |
484 | |
490 if(length(patients) > 0){ | 485 if(length(patients) > 0){ |
491 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) | 486 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) |
492 | 487 |
493 interval = intervalFreq | 488 interval = intervalFreq |
494 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 489 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
495 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 490 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
496 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) | 491 for (current_patient in patients){ |
492 print(paste("Started working", unique(current_patient$Patient), "Frequency analysis")) | |
493 patientCountOnColumn(current_patient, product=product, interval=interval, on="Frequency", appendtxt=T) | |
494 } | |
497 | 495 |
498 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) | 496 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) |
499 | 497 |
500 interval = intervalReads | 498 interval = intervalReads |
501 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 499 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
502 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 500 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
503 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") | 501 for (current_patient in patients){ |
502 print(paste("Started working on ", unique(current_patient$Patient), "Read Count analysis")) | |
503 patientCountOnColumn(current_patient, product=product, interval=interval, on="normalized_read_count") | |
504 } | |
504 } | 505 } |
505 if(nrow(single_patients) > 0){ | 506 if(nrow(single_patients) > 0){ |
506 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 507 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
507 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000)) | 508 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000)) |
508 p = p + geom_point(aes(colour=type), position="jitter") | 509 p = p + geom_point(aes(colour=type), position="jitter") |
934 if(sum(not_in_one) > 0){ | 935 if(sum(not_in_one) > 0){ |
935 #scatterplot_locus_data[not_in_one,]$type = "In multiple" | 936 #scatterplot_locus_data[not_in_one,]$type = "In multiple" |
936 } | 937 } |
937 p = NULL | 938 p = NULL |
938 if(nrow(scatterplot_locus_data) != 0){ | 939 if(nrow(scatterplot_locus_data) != 0){ |
939 if(on == "normalized_read_count"){ | 940 filename.scatter = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_scatter_", threshhold, sep="") |
940 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 941 write.table(scatterplot_locus_data, file=paste(filename.scatter, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
941 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6)) | 942 if(on == "normalized_read_count"){ |
943 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | |
944 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6)) | |
945 } else { | |
946 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100)) | |
947 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) | |
948 } | |
949 p = p + geom_point(aes(colour=type), position="jitter") | |
950 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) | |
942 } else { | 951 } else { |
943 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100)) | 952 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) |
944 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) | 953 } |
945 } | |
946 p = p + geom_point(aes(colour=type), position="jitter") | |
947 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) | |
948 } else { | |
949 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) | |
950 } | |
951 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) | 954 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) |
952 print(p) | 955 print(p) |
953 dev.off() | 956 dev.off() |
954 } | 957 } |
955 if(sum(all) > 0){ | 958 if(sum(all) > 0){ |