annotate RScript.r @ 0:ed6885c85660 draft

Uploaded
author davidvanzessen
date Wed, 31 Aug 2016 05:31:47 -0400
parents
children 75853bceec00
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
2 options(scipen=999)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
3
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
4 inFile = args[1]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
5 outDir = args[2]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
6 logfile = args[3]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
7 min_freq = as.numeric(args[4])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
8 min_cells = as.numeric(args[5])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
9 mergeOn = args[6]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
11 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
12
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
13 library(ggplot2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
14 library(reshape2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
15 library(data.table)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
16 library(grid)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
17 library(parallel)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
18 #require(xtable)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
21 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
22 dat$dsPerM = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
23 dat = dat[!is.na(dat$Patient),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
24 dat$Related_to_leukemia_clone = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
25
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
26 setwd(outDir)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
27 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
28 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
29 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
30
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
31 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
32
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
33 dat$Frequency = ((10^dat$Log10_Frequency)*100)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
34
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
35 dat = dat[dat$Frequency >= min_freq,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
36
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
37 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
38
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
39 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
40
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
41 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
42 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
43 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
44
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
45 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
46 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
47
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
48 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
49 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
50 dat = merge(dat, min_cell_count, by="min_cell_paste")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
51 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
52 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
53
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
54 dat = dat[dat$normalized_read_count >= min_cells,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
55
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
56 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
57
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
58 patients = split(dat, dat$Patient, drop=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
59 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
60 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
61 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
62 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
63 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
64 Titles = factor(Titles, levels=Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
65 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
66
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
67 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
68
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
69 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
70 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
71 scatter_locus_data_list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
72 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
73 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
74 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
75 if (!is.data.frame(x) & is.list(x)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
76 x = x[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
77 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
78 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
79 x = data.frame(x,stringsAsFactors=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
80 onShort = "reads"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
81 if(on == "Frequency"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
82 onShort = "freq"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
83 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
84 onx = paste(on, ".x", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
85 ony = paste(on, ".y", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
86 splt = split(x, x$Sample, drop=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
87 type="pair"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
88 if(length(splt) == 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
89 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
90 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
91 type="single"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
92 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
93 patient1 = splt[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
94 patient2 = splt[[2]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
95
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
96 threshholdIndex = which(colnames(product) == "interval")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
97 V_SegmentIndex = which(colnames(product) == "V_Segments")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
98 J_SegmentIndex = which(colnames(product) == "J_Segments")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
99 titleIndex = which(colnames(product) == "Titles")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
100 sampleIndex = which(colnames(x) == "Sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
101 patientIndex = which(colnames(x) == "Patient")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
102 oneSample = paste(patient1[1,sampleIndex], sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
103 twoSample = paste(patient2[1,sampleIndex], sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
104 patient = paste(x[1,patientIndex])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
105
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
106 switched = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
107 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
108 tmp = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
109 twoSample = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
110 oneSample = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
111 tmp = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
112 patient1 = patient2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
113 patient2 = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
114 switched = T
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
115 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
116 if(appendtxt){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
117 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
118 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
119 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
120
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
121 if(mergeOn == "Clone_Sequence"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
122 patient1$merge = paste(patient1$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
123 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
124 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
125 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
126 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
127 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
128
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
129 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
130 #scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
131 scatterplot_data = patient1[NULL,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
132 #scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
133 #scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
134 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
135 #scatterplot_data$type = factor(x=NULL, levels=scatterplot.data.type.factor)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
136 scatterplot_data$type = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
137 scatterplot_data$link = numeric(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
138 scatterplot_data$on = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
139
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
140 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
141 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
142
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
143 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
144
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
145 start.time = proc.time()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
146 merge.list = c()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
147
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
148 if(patient %in% names(patient.merge.list)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
149 patientMerge = patient.merge.list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
150 merge.list[["second"]] = patient.merge.list.second[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
151 scatterplot_data = scatter_locus_data_list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
152 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
153
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
154 print(names(patient.merge.list))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
155 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
156 #fuzzy matching here...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
157 #merge.list = patientMerge$merge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
158
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
159 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
160 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
161
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
162 patient1.fuzzy = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
163 patient2.fuzzy = patient2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
164
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
165 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
166 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
167
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
168 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
169 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
170
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
171 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
172 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
173
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
174 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
175 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
176
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
177 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
178 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
179
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
180 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
181 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
182
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
183 merge.list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
184
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
185 merge.list[["second"]] = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
186
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
187 link.count = 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
188
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
189 while(nrow(patient.fuzzy) > 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
190 first.merge = patient.fuzzy[1,"merge"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
191 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
192 first.sample = patient.fuzzy[1,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
193 merge.filter = first.merge == patient.fuzzy$merge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
194
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
195 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
196
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
197 first.sample.filter = first.sample == patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
198 second.sample.filter = first.sample != patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
199
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
200 #first match same sample, sum to a single row, same for other sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
201 #then merge rows like 'normal'
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
202
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
203 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
204
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
205
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
206
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
207 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
208 first.match.filter = merge.filter & sequence.filter & first.sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
209 second.match.filter = merge.filter & sequence.filter & second.sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
210
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
211 first.rows = patient.fuzzy[first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
212 second.rows = patient.fuzzy[second.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
213
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
214 first.rows.v = table(first.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
215 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
216 first.rows.j = table(first.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
217 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
218
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
219 first.sum = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
220 Patient = patient,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
221 Receptor = first.rows[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
222 Sample = first.rows[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
223 Cell_Count = first.rows[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
224 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
225 Log10_Frequency = log10(sum(first.rows$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
226 Total_Read_Count = sum(first.rows$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
227 dsPerM = sum(first.rows$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
228 J_Segment_Major_Gene = first.rows.j,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
229 V_Segment_Major_Gene = first.rows.v,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
230 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
231 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
232 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
233 Frequency = sum(first.rows$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
234 locus_V = first.rows[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
235 locus_J = first.rows[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
236 min_cell_count = first.rows[1,"min_cell_count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
237 normalized_read_count = sum(first.rows$normalized_read_count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
238 paste = first.rows[1,"paste"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
239 min_cell_paste = first.rows[1,"min_cell_paste"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
240
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
241 if(nrow(second.rows) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
242 second.rows.v = table(second.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
243 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
244 second.rows.j = table(second.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
245 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
246
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
247 second.sum = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
248 Patient = patient,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
249 Receptor = second.rows[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
250 Sample = second.rows[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
251 Cell_Count = second.rows[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
252 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
253 Log10_Frequency = log10(sum(second.rows$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
254 Total_Read_Count = sum(second.rows$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
255 dsPerM = sum(second.rows$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
256 J_Segment_Major_Gene = second.rows.j,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
257 V_Segment_Major_Gene = second.rows.v,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
258 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
259 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
260 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
261 Frequency = sum(second.rows$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
262 locus_V = second.rows[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
263 locus_J = second.rows[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
264 min_cell_count = second.rows[1,"min_cell_count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
265 normalized_read_count = sum(second.rows$normalized_read_count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
266 paste = second.rows[1,"paste"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
267 min_cell_paste = second.rows[1,"min_cell_paste"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
268
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
269 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
270 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
271
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
272 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
273 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
274
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
275 tmp.rows = rbind(first.rows, second.rows)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
276 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
277
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
278
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
279 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
280 scatterplot.row = first.sum[,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
281 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
282 scatterplot.row$link = link.count
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
283 scatterplot.row$on = onShort
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
284
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
285 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
286
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
287 scatterplot.row = second.sum[,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
288 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
289 scatterplot.row$link = link.count
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
290 scatterplot.row$on = onShort
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
291
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
292 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
293
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
294 #write some information about the match to a log file
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
295 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
296 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
297 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
298 second.clone.sequence = second.rows[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
299 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
300 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
301 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
302 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
303 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
304 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
305
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
306 } else if(nrow(first.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
307 if(patient1[1,"Sample"] == first.sample){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
308 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
309 patient1 = rbind(patient1, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
310 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
311 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
312 patient2 = rbind(patient2, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
313 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
314
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
315 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
316 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
317
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
318 patient.fuzzy = patient.fuzzy[-first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
319
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
320 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
321 scatterplot.row = first.sum[,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
322 scatterplot.row$type = first.sum[,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
323 scatterplot.row$link = link.count
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
324 scatterplot.row$on = onShort
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
325
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
326 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
327
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
328 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
329 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
330 patient.fuzzy = patient.fuzzy[-1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
331
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
332 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
333 scatterplot.row = first.sum[,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
334 scatterplot.row$type = first.sum[,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
335 scatterplot.row$link = link.count
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
336 scatterplot.row$on = onShort
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
337
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
338 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
339 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
340 link.count = link.count + 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
341 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
342 patient.merge.list[[patient]] <<- patientMerge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
343 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
344
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
345 sample.order = data.frame(type = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")),type.order = 1:4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
346 scatterplot_data = merge(scatterplot_data, sample.order, by="type")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
347
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
348 scatter_locus_data_list[[patient]] <<- scatterplot_data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
349 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
350 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
351
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
352 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
353 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
354
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
355
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
356 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
357 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
358 res1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
359 res2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
360 resBoth = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
361 read1Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
362 read2Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
363 locussum1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
364 locussum2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
365
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
366 #for(iter in 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
367 for(iter in 1:length(product[,1])){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
368 threshhold = product[iter,threshholdIndex]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
369 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
370 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
371 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
372 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
373 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
374 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
375 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
376 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
377 res1 = append(res1, sum(one))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
378 res2 = append(res2, sum(two))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
379 resBoth = append(resBoth, sum(both))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
380 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
381 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
382 #threshhold = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
383 if(threshhold != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
384 if(sum(one) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
385 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
386 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
387 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
388 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
389 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
390 if(sum(two) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
391 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
392 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
393 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
394 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
395 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
396 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
397 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
398 if(nrow(scatterplot_locus_data) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
399 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
400 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
401
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
402
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
403
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
404 p = NULL
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
405 print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
406 if(nrow(scatterplot_locus_data) != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
407 if(on == "normalized_read_count"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
408 write.table(scatterplot_locus_data, file=paste(oneSample, twoSample, product[iter, titleIndex], "scatterplot_locus_data.txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
409 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
410 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales, limits=c(1,1e6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
411 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
412 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
413 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
414 p = p + geom_point(aes(colour=type), position="dodge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
415 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
416 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
417 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
418 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
419 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
420 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
421 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
422 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
423 if(sum(both) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
424 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
425 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
426 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
427 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
428 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
429 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
430 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
431 if(sum(is.na(patientResult$percentage)) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
432 patientResult[is.na(patientResult$percentage),]$percentage = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
433 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
434 colnames(patientResult)[6] = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
435 colnames(patientResult)[8] = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
436 colnamesBak = colnames(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
437 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
438 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
439 colnames(patientResult) = colnamesBak
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
440
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
441 patientResult$Locus = factor(patientResult$Locus, Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
442 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
443
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
444 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
445 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
446 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
447 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
448 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
449 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
450 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
451 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
452 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
453 #(t,r,b,l)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
454 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
455 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
456 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
457 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
458 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
459 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
460 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
461 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
462 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
463
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
464 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
465 patientResult$relativeValue = patientResult$value * 10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
466 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
467 plt = ggplot(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
468 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
469 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
470 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
471 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
472 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
473 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
474 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
475 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
476 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
477 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
478
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
479 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
480
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
481 interval = intervalFreq
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
482 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
483 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
484 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
485
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
486 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
487
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
488 interval = intervalReads
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
489 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
490 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
491 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
492
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
493 if(nrow(single_patients) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
494 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
495 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
496 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
497 p = p + xlab("In one or both samples") + ylab("Reads")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
498 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
499 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
500 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
501 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
502
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
503 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
504 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
505 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
506 p = p + xlab("In one or both samples") + ylab("Frequency")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
507 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
508 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
509 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
510 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
511 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
512 empty <- data.frame()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
513 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
514
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
515 png("singles_reads_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
516 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
517 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
518
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
519 png("singles_freq_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
520 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
521 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
522 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
523
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
524 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
525 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
526
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
527 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
528 onShort = "reads"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
529 if(on == "Frequency"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
530 onShort = "freq"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
531 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
532 onx = paste(on, ".x", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
533 ony = paste(on, ".y", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
534 onz = paste(on, ".z", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
535 type="triplet"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
536
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
537 threshholdIndex = which(colnames(product) == "interval")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
538 V_SegmentIndex = which(colnames(product) == "V_Segments")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
539 J_SegmentIndex = which(colnames(product) == "J_Segments")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
540 titleIndex = which(colnames(product) == "Titles")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
541 sampleIndex = which(colnames(patient1) == "Sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
542 patientIndex = which(colnames(patient1) == "Patient")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
543 oneSample = paste(patient1[1,sampleIndex], sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
544 twoSample = paste(patient2[1,sampleIndex], sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
545 threeSample = paste(patient3[1,sampleIndex], sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
546
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
547 if(mergeOn == "Clone_Sequence"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
548 patient1$merge = paste(patient1$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
549 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
550 patient3$merge = paste(patient3$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
551
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
552 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
553 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
554 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
555 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
556 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
557
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
558 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
559 patient1.fuzzy = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
560 patient2.fuzzy = patient2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
561 patient3.fuzzy = patient3
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
562
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
563 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
564
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
565 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
566 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
567 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
568
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
569 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
570 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
571
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
572 other.sample.list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
573 other.sample.list[[oneSample]] = c(twoSample, threeSample)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
574 other.sample.list[[twoSample]] = c(oneSample, threeSample)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
575 other.sample.list[[threeSample]] = c(oneSample, twoSample)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
576
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
577 patientMerge = merge(patient1, patient2, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
578 patientMerge = merge(patientMerge, patient3, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
579 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
580 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
581 patientMerge = patientMerge[NULL,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
582
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
583 duo.merge.list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
584
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
585 patientMerge12 = merge(patient1, patient2, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
586 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
587 patientMerge12 = patientMerge12[NULL,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
588 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
589 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
590
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
591 patientMerge13 = merge(patient1, patient3, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
592 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
593 patientMerge13 = patientMerge13[NULL,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
594 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
595 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
596
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
597 patientMerge23 = merge(patient2, patient3, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
598 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
599 patientMerge23 = patientMerge23[NULL,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
600 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
601 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
602
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
603 merge.list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
604 merge.list[["second"]] = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
605
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
606 start.time = proc.time()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
607 if(paste(label1, "123") %in% names(patient.merge.list)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
608 patientMerge = patient.merge.list[[paste(label1, "123")]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
609 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
610 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
611 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
612
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
613 merge.list[["second"]] = patient.merge.list.second[[label1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
614
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
615 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
616 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
617 while(nrow(patient.fuzzy) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
618 first.merge = patient.fuzzy[1,"merge"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
619 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
620 first.sample = patient.fuzzy[1,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
621
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
622 merge.filter = first.merge == patient.fuzzy$merge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
623
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
624 second.sample = other.sample.list[[first.sample]][1]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
625 third.sample = other.sample.list[[first.sample]][2]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
626
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
627 sample.filter.1 = first.sample == patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
628 sample.filter.2 = second.sample == patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
629 sample.filter.3 = third.sample == patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
630
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
631 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
632
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
633 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
634 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
635 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
636
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
637 matches.in.1 = any(match.filter.1)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
638 matches.in.2 = any(match.filter.2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
639 matches.in.3 = any(match.filter.3)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
640
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
641
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
642
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
643 rows.1 = patient.fuzzy[match.filter.1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
644
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
645 sum.1 = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
646 Patient = label1,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
647 Receptor = rows.1[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
648 Sample = rows.1[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
649 Cell_Count = rows.1[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
650 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
651 Log10_Frequency = log10(sum(rows.1$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
652 Total_Read_Count = sum(rows.1$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
653 dsPerM = sum(rows.1$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
654 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
655 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
656 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
657 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
658 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
659 Frequency = sum(rows.1$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
660 locus_V = rows.1[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
661 locus_J = rows.1[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
662 uniqueID = rows.1[1,"uniqueID"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
663 normalized_read_count = sum(rows.1$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
664 sum.2 = sum.1[NULL,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
665 rows.2 = patient.fuzzy[match.filter.2,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
666 if(matches.in.2){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
667 sum.2 = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
668 Patient = label1,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
669 Receptor = rows.2[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
670 Sample = rows.2[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
671 Cell_Count = rows.2[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
672 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
673 Log10_Frequency = log10(sum(rows.2$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
674 Total_Read_Count = sum(rows.2$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
675 dsPerM = sum(rows.2$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
676 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
677 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
678 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
679 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
680 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
681 Frequency = sum(rows.2$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
682 locus_V = rows.2[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
683 locus_J = rows.2[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
684 uniqueID = rows.2[1,"uniqueID"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
685 normalized_read_count = sum(rows.2$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
686 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
687
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
688 sum.3 = sum.1[NULL,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
689 rows.3 = patient.fuzzy[match.filter.3,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
690 if(matches.in.3){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
691 sum.3 = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
692 Patient = label1,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
693 Receptor = rows.3[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
694 Sample = rows.3[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
695 Cell_Count = rows.3[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
696 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
697 Log10_Frequency = log10(sum(rows.3$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
698 Total_Read_Count = sum(rows.3$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
699 dsPerM = sum(rows.3$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
700 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
701 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
702 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
703 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
704 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
705 Frequency = sum(rows.3$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
706 locus_V = rows.3[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
707 locus_J = rows.3[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
708 uniqueID = rows.3[1,"uniqueID"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
709 normalized_read_count = sum(rows.3$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
710 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
711
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
712 if(matches.in.2 & matches.in.3){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
713 merge.123 = merge(sum.1, sum.2, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
714 merge.123 = merge(merge.123, sum.3, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
715 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
716 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
717
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
718 patientMerge = rbind(patientMerge, merge.123)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
719 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
720
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
721 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
722 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
723
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
724 } else if (matches.in.2) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
725 #other.sample1 = other.sample.list[[first.sample]][1]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
726 #other.sample2 = other.sample.list[[first.sample]][2]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
727
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
728 second.sample = sum.2[,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
729
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
730 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
731
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
732 merge.12 = merge(sum.1, sum.2, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
733
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
734 current.merge.list = rbind(current.merge.list, merge.12)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
735 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
736
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
737 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
738
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
739 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
740 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
741
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
742 } else if (matches.in.3) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
743
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
744 #other.sample1 = other.sample.list[[first.sample]][1]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
745 #other.sample2 = other.sample.list[[first.sample]][2]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
746
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
747 second.sample = sum.3[,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
748
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
749 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
750
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
751 merge.13 = merge(sum.1, sum.3, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
752
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
753 current.merge.list = rbind(current.merge.list, merge.13)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
754 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
755
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
756 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
757
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
758 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
759 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
760
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
761 } else if(nrow(rows.1) > 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
762 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
763 print(names(patient1)[names(patient1) %in% sum.1])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
764 print(names(patient1)[!(names(patient1) %in% sum.1)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
765 print(names(patient1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
766 print(names(sum.1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
767 print(summary(sum.1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
768 print(summary(patient1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
769 print(dim(sum.1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
770 print(dim(patient1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
771 print(head(sum.1[,names(patient1)]))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
772 patient1 = rbind(patient1, sum.1[,names(patient1)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
773 patient.fuzzy = patient.fuzzy[-match.filter.1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
774 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
775 patient.fuzzy = patient.fuzzy[-1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
776 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
777
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
778 tmp.rows = rbind(rows.1, rows.2, rows.3)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
779 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
780
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
781 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
782 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
783 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
784 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
785
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
786 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
787 patient.merge.list[[paste(label1, "123")]] = patientMerge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
788
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
789 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
790 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
791 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
792
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
793 patient.merge.list[[paste(label1, "12")]] = patientMerge12
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
794 patient.merge.list[[paste(label1, "13")]] = patientMerge13
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
795 patient.merge.list[[paste(label1, "23")]] = patientMerge23
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
796
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
797 patient.merge.list.second[[label1]] = merge.list[["second"]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
798 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
799 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
800 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
801 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
802 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
803 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
804
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
805 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
806 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
807 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
808 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
809
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
810 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
811 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
812 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
813
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
814 if(F){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
815 patientMerge = merge(patient1, patient2, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
816 patientMerge = merge(patientMerge, patient3, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
817 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
818 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
819 patientMerge12 = merge(patient1, patient2, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
820 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
821 patientMerge13 = merge(patient1, patient3, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
822 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
823 patientMerge23 = merge(patient2, patient3, by="merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
824 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
825 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
826
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
827 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
828 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
829 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
830 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
831
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
832 res1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
833 res2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
834 res3 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
835 res12 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
836 res13 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
837 res23 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
838 resAll = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
839 read1Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
840 read2Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
841 read3Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
842
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
843 if(appendTriplets){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
844 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
845 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
846 for(iter in 1:length(product[,1])){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
847 threshhold = product[iter,threshholdIndex]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
848 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
849 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
850 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
851 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
852
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
853 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
854 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
855 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
856
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
857 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
858 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
859 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
860
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
861 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
862 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
863 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
864 res1 = append(res1, sum(one))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
865 res2 = append(res2, sum(two))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
866 res3 = append(res3, sum(three))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
867 resAll = append(resAll, sum(all))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
868 res12 = append(res12, sum(one_two))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
869 res13 = append(res13, sum(one_three))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
870 res23 = append(res23, sum(two_three))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
871 #threshhold = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
872 if(threshhold != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
873 if(sum(one) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
874 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
875 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
876 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
877 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
878 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
879 if(sum(two) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
880 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
881 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
882 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
883 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
884 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
885 if(sum(three) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
886 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
887 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
888 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
889 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
890 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
891 if(sum(one_two) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
892 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
893 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
894 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
895 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
896 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
897 if(sum(one_three) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
898 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
899 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
900 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
901 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
902 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
903 if(sum(two_three) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
904 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
905 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
906 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
907 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
908 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
909 } else { #scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
910 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
911 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
912 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
913 if(sum(in_two) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
914 scatterplot_locus_data[in_two,]$type = "In two"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
915 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
916 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
917 if(sum(in_three)> 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
918 scatterplot_locus_data[in_three,]$type = "In three"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
919 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
920 not_in_one = scatterplot_locus_data$type != "In one"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
921 if(sum(not_in_one) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
922 #scatterplot_locus_data[not_in_one,]$type = "In multiple"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
923 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
924 p = NULL
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
925 if(nrow(scatterplot_locus_data) != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
926 if(on == "normalized_read_count"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
927 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
928 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
929 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
930 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
931 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
932 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
933 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
934 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
935 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
936 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
937 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
938 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
939 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
940 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
941 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
942 if(sum(all) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
943 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
944 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
945 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
946 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
947 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
948 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
949 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
950 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
951 colnames(patientResult)[6] = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
952 colnames(patientResult)[7] = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
953 colnames(patientResult)[8] = threeSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
954 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
955 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
956 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
957
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
958 colnamesBak = colnames(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
959 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
960 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
961 colnames(patientResult) = colnamesBak
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
962
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
963 patientResult$Locus = factor(patientResult$Locus, Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
964 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
965
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
966 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
967 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
968 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
969 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
970 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
971 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
972 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
973 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
974 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
975
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
976 fontSize = 4
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
977
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
978 bak = patientResult
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
979 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
980 patientResult$relativeValue = patientResult$value * 10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
981 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
982 plt = ggplot(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
983 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
984 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
985 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
986 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
987 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
988 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
989 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
990 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
991 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
992 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
993 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
994
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
995 if(nrow(triplets) != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
996
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
997 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
998
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
999 triplets$uniqueID = "ID"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1000
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1001 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1002 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1003 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1004
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1005 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1006 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1007 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1008
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1009 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1010
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1011 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1012
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1013 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1014 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1015 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1016
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1017 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1018 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1019
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1020 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1021
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1022 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1023
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1024 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1025
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1026 triplets = triplets[triplets$normalized_read_count >= min_cells,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1027
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1028 column_drops = c("min_cell_count", "min_cell_paste")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1029
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1030 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1031
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1032 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1033
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1034 interval = intervalReads
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1035 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1036 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1037
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1038 one = triplets[triplets$Sample == "14696_reg_BM",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1039 two = triplets[triplets$Sample == "24536_reg_BM",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1040 three = triplets[triplets$Sample == "24062_reg_BM",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1041 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1042
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1043 one = triplets[triplets$Sample == "16278_Left",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1044 two = triplets[triplets$Sample == "26402_Left",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1045 three = triplets[triplets$Sample == "26759_Left",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1046 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1047
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1048 one = triplets[triplets$Sample == "16278_Right",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1049 two = triplets[triplets$Sample == "26402_Right",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1050 three = triplets[triplets$Sample == "26759_Right",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1051 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1052
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1053 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1054
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1055 interval = intervalFreq
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1056 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1057 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1058
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1059 one = triplets[triplets$Sample == "14696_reg_BM",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1060 two = triplets[triplets$Sample == "24536_reg_BM",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1061 three = triplets[triplets$Sample == "24062_reg_BM",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1062 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1063
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1064 one = triplets[triplets$Sample == "16278_Left",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1065 two = triplets[triplets$Sample == "26402_Left",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1066 three = triplets[triplets$Sample == "26759_Left",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1067 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1068
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1069 one = triplets[triplets$Sample == "16278_Right",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1070 two = triplets[triplets$Sample == "26402_Right",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1071 three = triplets[triplets$Sample == "26759_Right",]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1072 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1073 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1074 cat("", file="triplets.txt")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1075 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1076 cat("</table></html>", file=logfile, append=T)