annotate RScript.r @ 8:eb2aa7cffca3 draft default tip

Uploaded
author davidvanzessen
date Wed, 13 Dec 2017 06:00:44 -0500
parents 7ce82833977c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
2 options(scipen=999)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
3
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
4 inFile = args[1]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
5 outDir = args[2]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
6 logfile = args[3]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
7 min_freq = as.numeric(args[4])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
8 min_cells = as.numeric(args[5])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
9 mergeOn = args[6]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
11 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
12
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
13 library(ggplot2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
14 library(reshape2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
15 library(data.table)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
16 library(grid)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
17 library(parallel)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
18 #require(xtable)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
21
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
22 needed_cols = c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
23 if(!all(needed_cols %in% names(dat))){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
24 cat("Missing column(s):<br />", file=logfile, append=F)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
25 missing_cols = needed_cols[!(needed_cols %in% names(dat))]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
26 for(col in missing_cols){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
27 cat(paste(col, "<br />"), file=logfile, append=T)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
28 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
29 stop("Not all columns are present")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
30 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
31
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
32 if(!("Total_Read_Count" %in% names(dat))){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
33 dat$Total_Read_Count = 0
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
34 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
35
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
36 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")]
7
7ce82833977c Uploaded
davidvanzessen
parents: 6
diff changeset
37 dat = dat[!(nchar(dat$Clone_Sequence) < 2),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
38
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
39 dat$dsPerM = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
40 dat = dat[!is.na(dat$Patient),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
41 dat$Related_to_leukemia_clone = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
42
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
43 setwd(outDir)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
44 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
45 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
46 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
47
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
48 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
49
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
50 dat$Frequency = ((10^dat$Log10_Frequency)*100)
5
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
51 dat = dat[dat$Frequency <= 100,] #just in case?
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
52
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
53 dat = dat[dat$Frequency >= min_freq,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
54
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
55 patient.sample.counts = data.frame(data.table(dat)[, list(count=.N), by=c("Patient", "Sample")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
56 patient.sample.counts = data.frame(data.table(patient.sample.counts)[, list(count=.N), by=c("Patient")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
57
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
58 print("Found the following patients with number of samples:")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
59 print(patient.sample.counts)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
60
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
61 patient.sample.counts.pairs = patient.sample.counts[patient.sample.counts$count %in% 1:2,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
62 patient.sample.counts.triplets = patient.sample.counts[patient.sample.counts$count == 3,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
63
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
64
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
65
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
66 triplets = dat[dat$Patient %in% patient.sample.counts.triplets$Patient,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
67 dat = dat[dat$Patient %in% patient.sample.counts.pairs$Patient,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
68
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
69 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
70
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
71 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
72 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
73 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
74
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
75 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
76 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
77
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
78 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
79 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
80 dat = merge(dat, min_cell_count, by="min_cell_paste")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
81 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
82 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
83
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
84 dat = dat[dat$normalized_read_count >= min_cells,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
85
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
86 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
87
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
88 patients = split(dat, dat$Patient, drop=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
89 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
90 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
91 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
92 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
93 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
94 Titles = factor(Titles, levels=Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
95 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
96
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
97 single_patients = dat[NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
98
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
99 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
100 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
101 scatter_locus_data_list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
102 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
103 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
104 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
105 if (!is.data.frame(x) & is.list(x)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
106 x = x[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
107 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
108 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
109 x = data.frame(x,stringsAsFactors=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
110 onShort = "reads"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
111 if(on == "Frequency"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
112 onShort = "freq"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
113 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
114 onx = paste(on, ".x", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
115 ony = paste(on, ".y", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
116 splt = split(x, x$Sample, drop=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
117 type="pair"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
118 if(length(splt) == 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
119 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
120 splt[[2]] = splt[[1]][NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
121 type="single"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
122 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
123 patient1 = splt[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
124 patient2 = splt[[2]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
125
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
126 oneSample = patient1[1,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
127 twoSample = patient2[1,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
128 patient = x[1,"Patient"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
129
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
130 switched = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
131 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
132 tmp = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
133 twoSample = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
134 oneSample = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
135 tmp = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
136 patient1 = patient2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
137 patient2 = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
138 switched = T
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
139 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
140 if(appendtxt){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
141 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
142 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
143 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
144
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
145 if(mergeOn == "Clone_Sequence"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
146 patient1$merge = paste(patient1$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
147 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
148 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
149 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
150 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
151 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
152
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
153 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
154 scatterplot_data = patient1[NULL,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
155 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
156 scatterplot_data$type = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
157 scatterplot_data$link = numeric(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
158 scatterplot_data$on = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
159
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
160 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
161
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
162 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
163
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
164 start.time = proc.time()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
165 merge.list = c()
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
166
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
167 if(patient %in% names(patient.merge.list)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
168 patientMerge = patient.merge.list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
169 merge.list[["second"]] = patient.merge.list.second[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
170 scatterplot_data = scatter_locus_data_list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
171 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
172
5
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
173 #print(names(patient.merge.list))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
174 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
175 #fuzzy matching here...
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
176
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
177 patient1.fuzzy = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
178 patient2.fuzzy = patient2
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
179
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
180 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
181 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
182
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
183 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
184 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
185
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
186 merge.list = list()
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
187
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
188 merge.list[["second"]] = vector()
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
189
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
190 link.count = 1
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
191
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
192 while(nrow(patient.fuzzy) > 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
193 first.merge = patient.fuzzy[1,"merge"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
194 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
195 first.sample = patient.fuzzy[1,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
196 merge.filter = first.merge == patient.fuzzy$merge
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
197
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
198 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
199
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
200 first.sample.filter = first.sample == patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
201 second.sample.filter = first.sample != patient.fuzzy$Sample
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
202
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
203 #first match same sample, sum to a single row, same for other sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
204 #then merge rows like 'normal'
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
205
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
206 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
207
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
208
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
209
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
210 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
211 first.match.filter = merge.filter & sequence.filter & first.sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
212 second.match.filter = merge.filter & sequence.filter & second.sample.filter
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
213
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
214 first.rows = patient.fuzzy[first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
215 second.rows = patient.fuzzy[second.match.filter,]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
216
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
217 first.rows.v = table(first.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
218 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
219 first.rows.j = table(first.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
220 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
221
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
222 first.sum = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
223 Patient = patient,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
224 Receptor = first.rows[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
225 Sample = first.rows[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
226 Cell_Count = first.rows[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
227 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
228 Log10_Frequency = log10(sum(first.rows$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
229 Total_Read_Count = sum(first.rows$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
230 dsPerM = sum(first.rows$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
231 J_Segment_Major_Gene = first.rows.j,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
232 V_Segment_Major_Gene = first.rows.v,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
233 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
234 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
235 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
236 Frequency = sum(first.rows$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
237 locus_V = first.rows[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
238 locus_J = first.rows[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
239 min_cell_count = first.rows[1,"min_cell_count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
240 normalized_read_count = sum(first.rows$normalized_read_count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
241 paste = first.rows[1,"paste"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
242 min_cell_paste = first.rows[1,"min_cell_paste"])
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
243
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
244 if(nrow(second.rows) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
245 second.rows.v = table(second.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
246 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
247 second.rows.j = table(second.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
248 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
249
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
250 second.sum = data.frame(merge = first.clone.sequence,
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
251 Patient = patient,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
252 Receptor = second.rows[1,"Receptor"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
253 Sample = second.rows[1,"Sample"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
254 Cell_Count = second.rows[1,"Cell_Count"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
255 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
256 Log10_Frequency = log10(sum(second.rows$Frequency)),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
257 Total_Read_Count = sum(second.rows$Total_Read_Count),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
258 dsPerM = sum(second.rows$dsPerM),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
259 J_Segment_Major_Gene = second.rows.j,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
260 V_Segment_Major_Gene = second.rows.v,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
261 Clone_Sequence = first.clone.sequence,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
262 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
263 Related_to_leukemia_clone = F,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
264 Frequency = sum(second.rows$Frequency),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
265 locus_V = second.rows[1,"locus_V"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
266 locus_J = second.rows[1,"locus_J"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
267 min_cell_count = second.rows[1,"min_cell_count"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
268 normalized_read_count = sum(second.rows$normalized_read_count),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
269 paste = second.rows[1,"paste"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
270 min_cell_paste = second.rows[1,"min_cell_paste"])
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
271
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
272 #print(names(patientMerge))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
273 #print(merge(first.sum, second.sum, by="merge"))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
274 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
275 #print("test2")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
276 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
277
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
278 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
279 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
280
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
281 tmp.rows = rbind(first.rows, second.rows)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
282 #print("test3")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
283 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
284
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
285
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
286 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
287 scatterplot.row = first.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
288 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
289 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
290 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
291
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
292 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
293
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
294 scatterplot.row = second.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
295 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
296 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
297 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
298
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
299 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
300
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
301 #write some information about the match to a log file
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
302 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
303 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
304 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
305 second.clone.sequence = second.rows[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
306 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
307 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
308 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
309 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
310 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
311 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
312
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
313 } else if(nrow(first.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
314 if(patient1[1,"Sample"] == first.sample){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
315 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
316 patient1 = rbind(patient1, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
317 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
318 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
319 patient2 = rbind(patient2, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
320 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
321
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
322 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
323 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
324
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
325 patient.fuzzy = patient.fuzzy[-first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
326
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
327 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
328 scatterplot.row = first.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
329 scatterplot.row$type = first.sum[,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
330 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
331 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
332
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
333 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
334
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
335 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
336 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
337 patient.fuzzy = patient.fuzzy[-1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
338
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
339 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
340 scatterplot.row = first.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
341 scatterplot.row$type = first.sum[,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
342 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
343 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
344
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
345 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
346 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
347 link.count = link.count + 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
348 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
349 patient.merge.list[[patient]] <<- patientMerge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
350 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
351
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
352 sample.order = data.frame(type = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")),type.order = 1:4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
353 scatterplot_data = merge(scatterplot_data, sample.order, by="type")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
354
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
355 scatter_locus_data_list[[patient]] <<- scatterplot_data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
356 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
357 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
358
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
359 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
360 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
361
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
362
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
363 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
364 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
365 res1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
366 res2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
367 resBoth = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
368 read1Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
369 read2Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
370 locussum1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
371 locussum2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
372
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
373 #for(iter in 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
374 for(iter in 1:length(product[,1])){
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
375 threshhold = product[iter,"interval"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
376 V_Segment = paste(".*", as.character(product[iter,"V_Segments"]), ".*", sep="")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
377 J_Segment = paste(".*", as.character(product[iter,"J_Segments"]), ".*", sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
378 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
379 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
380 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
381 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
382 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
383 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
384 res1 = append(res1, sum(one))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
385 res2 = append(res2, sum(two))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
386 resBoth = append(resBoth, sum(both))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
387 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
388 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
389 #threshhold = 0
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
390 if(threshhold != 0 | T){
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
391 if(sum(one) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
392 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
393 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
394 filenameOne = paste(oneSample, "_", product[iter, "Titles"], "_", threshhold, sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
395 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
396 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
397 if(sum(two) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
398 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
399 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
400 filenameTwo = paste(twoSample, "_", product[iter, "Titles"], "_", threshhold, sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
401 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
402 }
6
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
403 }
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
404 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
405 if(nrow(scatterplot_locus_data) > 0){
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
406 scatterplot_locus_data$Rearrangement = product[iter, "Titles"]
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
407 }
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
408 p = NULL
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
409 #print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data)))
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
410 if(nrow(scatterplot_locus_data) != 0){
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
411 if(on == "normalized_read_count"){
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
412 write.table(scatterplot_locus_data, file=paste(oneSample, twoSample, product[iter, "Titles"], "scatterplot_locus_data.txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
413 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
414 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales, limits=c(1,1e6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
415 } else {
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
416 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
417 }
6
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
418 p = p + geom_point(aes(colour=type), position="dodge")
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
419 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"]))
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
420 } else {
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
421 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"]))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
422 }
8
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
423 file_name = paste(patient1[1,"Patient"], "_", patient1[1,"Sample"], "_", patient2[1,"Sample"], "_", onShort, "_", product[iter, "Titles"],"_scatter.png", sep="")
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
424 print(paste("Writing figure:", file_name))
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
425 png(file_name)
6
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
426 print(p)
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
427 dev.off()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
428 if(sum(both) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
429 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
430 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
431 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, "Titles"], "_", threshhold, sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
432 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
433 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
434 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
435 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
436 if(sum(is.na(patientResult$percentage)) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
437 patientResult[is.na(patientResult$percentage),]$percentage = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
438 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
439 colnames(patientResult)[6] = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
440 colnames(patientResult)[8] = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
441 colnamesBak = colnames(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
442 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
443 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
444 colnames(patientResult) = colnamesBak
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
445
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
446 patientResult$Locus = factor(patientResult$Locus, Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
447 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
448
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
449 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
450 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
451 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
452 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
453 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
454 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
8
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
455 file_name = paste(patient, "_", onShort, ".png", sep="")
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
456 print(paste("Writing figure:", file_name))
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
457 png(file_name, width=1920, height=1080)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
458 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
459 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
460 #(t,r,b,l)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
461 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
462 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
463 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
464 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
465 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
466 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
8
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
467
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
468 file_name = paste(patient, "_percent_", onShort, ".png", sep="")
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
469 print(paste("Writing figure:", file_name))
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
470 png(file_name, width=1920, height=1080)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
471 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
472 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
473
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
474 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
475 patientResult$relativeValue = patientResult$value * 10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
476 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
477 plt = ggplot(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
478 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
479 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
480 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
481 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
482 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
483 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
8
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
484 file_name = paste(patient, "_", onShort, "_both.png", sep="")
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
485 print(paste("Writing figure:", file_name))
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
486 png(file_name, width=1920, height=1080)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
487 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
488 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
489 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
490
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
491 if(length(patients) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
492 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
493
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
494 interval = intervalFreq
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
495 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
496 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
497 for (current_patient in patients){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
498 print(paste("Started working", unique(current_patient$Patient), "Frequency analysis"))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
499 patientCountOnColumn(current_patient, product=product, interval=interval, on="Frequency", appendtxt=T)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
500 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
501
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
502 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
503
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
504 interval = intervalReads
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
505 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
506 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
507 for (current_patient in patients){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
508 print(paste("Started working on ", unique(current_patient$Patient), "Read Count analysis"))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
509 patientCountOnColumn(current_patient, product=product, interval=interval, on="normalized_read_count")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
510 }
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
511 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
512 if(nrow(single_patients) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
513 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
514 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
515 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
516 p = p + xlab("In one or both samples") + ylab("Reads")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
517 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
8
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
518 file_name = "singles_reads_scatterplot.png"
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
519 print(paste("Writing figure:", file_name))
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
520 png(file_name, width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
521 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
522 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
523
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
524 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
525 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
526 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
527 p = p + xlab("In one or both samples") + ylab("Frequency")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
528 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
8
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
529 file_name = "singles_freq_scatterplot.png"
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
530 print(paste("Writing figure:", file_name))
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
531 png(file_name, width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
532 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
533 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
534 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
535 empty <- data.frame()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
536 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
537
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
538 png("singles_reads_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
539 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
540 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
541
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
542 png("singles_freq_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
543 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
544 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
545 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
546
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
547 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
548 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
549
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
550 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
551 onShort = "reads"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
552 if(on == "Frequency"){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
553 onShort = "freq"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
554 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
555 onx = paste(on, ".x", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
556 ony = paste(on, ".y", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
557 onz = paste(on, ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
558 type="triplet"
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
559
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
560 threshholdIndex = which(colnames(product) == "interval")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
561 V_SegmentIndex = which(colnames(product) == "V_Segments")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
562 J_SegmentIndex = which(colnames(product) == "J_Segments")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
563 titleIndex = which(colnames(product) == "Titles")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
564 sampleIndex = which(colnames(patient1) == "Sample")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
565 patientIndex = which(colnames(patient1) == "Patient")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
566 oneSample = paste(patient1[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
567 twoSample = paste(patient2[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
568 threeSample = paste(patient3[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
569
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
570 if(mergeOn == "Clone_Sequence"){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
571 patient1$merge = paste(patient1$Clone_Sequence)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
572 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
573 patient3$merge = paste(patient3$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
574
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
575 } else {
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
576 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
577 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
578 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
579 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
580
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
581 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
582 patient1.fuzzy = patient1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
583 patient2.fuzzy = patient2
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
584 patient3.fuzzy = patient3
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
585
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
586 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
587
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
588 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
589 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
590 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
591
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
592 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
593 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
594
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
595 other.sample.list = list()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
596 other.sample.list[[oneSample]] = c(twoSample, threeSample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
597 other.sample.list[[twoSample]] = c(oneSample, threeSample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
598 other.sample.list[[threeSample]] = c(oneSample, twoSample)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
599
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
600 patientMerge = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
601 patientMerge = merge(patientMerge, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
602 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
603 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
604 patientMerge = patientMerge[NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
605
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
606 duo.merge.list = list()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
607
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
608 patientMerge12 = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
609 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
610 patientMerge12 = patientMerge12[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
611 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
612 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
613
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
614 patientMerge13 = merge(patient1, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
615 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
616 patientMerge13 = patientMerge13[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
617 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
618 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
619
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
620 patientMerge23 = merge(patient2, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
621 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
622 patientMerge23 = patientMerge23[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
623 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
624 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
625
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
626 merge.list = list()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
627 merge.list[["second"]] = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
628
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
629 #print(paste(nrow(patient1), nrow(patient2), nrow(patient3), label1, label2, label3))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
630
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
631 start.time = proc.time()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
632 if(paste(label1, "123") %in% names(patient.merge.list)){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
633 patientMerge = patient.merge.list[[paste(label1, "123")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
634 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
635 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
636 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
637
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
638 #merge.list[["second"]] = patient.merge.list.second[[label1]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
639
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
640 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
641 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
642 while(nrow(patient.fuzzy) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
643 first.merge = patient.fuzzy[1,"merge"]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
644 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
645 first.sample = paste(patient.fuzzy[1,"Sample"], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
646
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
647 merge.filter = first.merge == patient.fuzzy$merge
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
648
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
649 second.sample = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
650 third.sample = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
651
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
652 sample.filter.1 = first.sample == patient.fuzzy$Sample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
653 sample.filter.2 = second.sample == patient.fuzzy$Sample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
654 sample.filter.3 = third.sample == patient.fuzzy$Sample
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
655
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
656 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
657
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
658 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
659 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
660 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
661
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
662 matches.in.1 = any(match.filter.1)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
663 matches.in.2 = any(match.filter.2)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
664 matches.in.3 = any(match.filter.3)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
665
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
666 rows.1 = patient.fuzzy[match.filter.1,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
667
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
668 sum.1 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
669 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
670 Receptor = rows.1[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
671 Sample = rows.1[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
672 Cell_Count = rows.1[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
673 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
674 Log10_Frequency = log10(sum(rows.1$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
675 Total_Read_Count = sum(rows.1$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
676 dsPerM = sum(rows.1$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
677 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
678 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
679 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
680 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
681 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
682 Frequency = sum(rows.1$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
683 locus_V = rows.1[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
684 locus_J = rows.1[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
685 uniqueID = rows.1[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
686 normalized_read_count = sum(rows.1$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
687 sum.2 = sum.1[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
688 rows.2 = patient.fuzzy[match.filter.2,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
689 if(matches.in.2){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
690 sum.2 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
691 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
692 Receptor = rows.2[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
693 Sample = rows.2[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
694 Cell_Count = rows.2[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
695 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
696 Log10_Frequency = log10(sum(rows.2$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
697 Total_Read_Count = sum(rows.2$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
698 dsPerM = sum(rows.2$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
699 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
700 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
701 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
702 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
703 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
704 Frequency = sum(rows.2$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
705 locus_V = rows.2[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
706 locus_J = rows.2[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
707 uniqueID = rows.2[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
708 normalized_read_count = sum(rows.2$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
709 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
710
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
711 sum.3 = sum.1[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
712 rows.3 = patient.fuzzy[match.filter.3,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
713 if(matches.in.3){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
714 sum.3 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
715 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
716 Receptor = rows.3[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
717 Sample = rows.3[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
718 Cell_Count = rows.3[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
719 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
720 Log10_Frequency = log10(sum(rows.3$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
721 Total_Read_Count = sum(rows.3$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
722 dsPerM = sum(rows.3$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
723 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
724 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
725 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
726 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
727 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
728 Frequency = sum(rows.3$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
729 locus_V = rows.3[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
730 locus_J = rows.3[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
731 uniqueID = rows.3[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
732 normalized_read_count = sum(rows.3$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
733 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
734
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
735 if(matches.in.2 & matches.in.3){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
736 merge.123 = merge(sum.1, sum.2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
737 merge.123 = merge(merge.123, sum.3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
738 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
739 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
740
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
741 patientMerge = rbind(patientMerge, merge.123)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
742 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
743
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
744 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
745 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
746
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
747 } else if (matches.in.2) {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
748 #other.sample1 = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
749 #other.sample2 = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
750
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
751 second.sample = sum.2[,"Sample"]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
752
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
753 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
754
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
755 merge.12 = merge(sum.1, sum.2, by="merge")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
756
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
757 current.merge.list = rbind(current.merge.list, merge.12)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
758 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
759
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
760 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
761
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
762 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
763 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
764
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
765 } else if (matches.in.3) {
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
766
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
767 #other.sample1 = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
768 #other.sample2 = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
769
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
770 second.sample = sum.3[,"Sample"]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
771
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
772 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
773
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
774 merge.13 = merge(sum.1, sum.3, by="merge")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
775
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
776 current.merge.list = rbind(current.merge.list, merge.13)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
777 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
778
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
779 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
780
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
781 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
782 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
783
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
784 } else if(nrow(rows.1) > 1){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
785 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
5
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
786 #print(names(patient1)[names(patient1) %in% sum.1])
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
787 #print(names(patient1)[!(names(patient1) %in% sum.1)])
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
788 #print(names(patient1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
789 #print(names(sum.1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
790 #print(summary(sum.1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
791 #print(summary(patient1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
792 #print(dim(sum.1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
793 #print(dim(patient1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
794 #print(head(sum.1[,names(patient1)]))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
795 patient1 = rbind(patient1, sum.1[,names(patient1)])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
796 patient.fuzzy = patient.fuzzy[-match.filter.1,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
797 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
798 patient.fuzzy = patient.fuzzy[-1,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
799 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
800
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
801 tmp.rows = rbind(rows.1, rows.2, rows.3)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
802 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
803
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
804 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
805 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
806 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
807 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
808
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
809 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
810 patient.merge.list[[paste(label1, "123")]] = patientMerge
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
811
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
812 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
813 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
814 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
815
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
816 patient.merge.list[[paste(label1, "12")]] = patientMerge12
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
817 patient.merge.list[[paste(label1, "13")]] = patientMerge13
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
818 patient.merge.list[[paste(label1, "23")]] = patientMerge23
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
819
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
820 #patient.merge.list.second[[label1]] = merge.list[["second"]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
821 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
822 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
823 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
824 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
825 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
826 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
827
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
828 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
829 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
830 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
831 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
832
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
833 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
834 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
835 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
836
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
837 if(F){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
838 patientMerge = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
839 patientMerge = merge(patientMerge, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
840 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
841 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
842 patientMerge12 = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
843 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
844 patientMerge13 = merge(patient1, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
845 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
846 patientMerge23 = merge(patient2, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
847 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
848 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
849
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
850 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
851 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
852 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
853
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
854 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
855
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
856 res1 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
857 res2 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
858 res3 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
859 res12 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
860 res13 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
861 res23 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
862 resAll = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
863 read1Count = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
864 read2Count = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
865 read3Count = vector()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
866
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
867 if(appendTriplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
868 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
869 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
870 for(iter in 1:length(product[,1])){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
871 threshhold = product[iter,threshholdIndex]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
872 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
873 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
874 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
875 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
876
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
877 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
878 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
879 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
880
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
881 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
882 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
883 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
884
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
885 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
886 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
887 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
888 res1 = append(res1, sum(one))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
889 res2 = append(res2, sum(two))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
890 res3 = append(res3, sum(three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
891 resAll = append(resAll, sum(all))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
892 res12 = append(res12, sum(one_two))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
893 res13 = append(res13, sum(one_three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
894 res23 = append(res23, sum(two_three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
895 #threshhold = 0
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
896 if(threshhold != 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
897 if(sum(one) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
898 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
899 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
900 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
901 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
902 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
903 if(sum(two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
904 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
905 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
906 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
907 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
908 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
909 if(sum(three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
910 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
911 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
912 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
913 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
914 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
915 if(sum(one_two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
916 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
917 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
918 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
919 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
920 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
921 if(sum(one_three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
922 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
923 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
924 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
925 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
926 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
927 if(sum(two_three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
928 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
929 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
930 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
931 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
932 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
933 } else { #scatterplot data
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
934 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
935 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
936 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
937 if(sum(in_two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
938 scatterplot_locus_data[in_two,]$type = "In two"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
939 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
940 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
941 if(sum(in_three)> 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
942 scatterplot_locus_data[in_three,]$type = "In three"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
943 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
944 not_in_one = scatterplot_locus_data$type != "In one"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
945 if(sum(not_in_one) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
946 #scatterplot_locus_data[not_in_one,]$type = "In multiple"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
947 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
948 p = NULL
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
949 if(nrow(scatterplot_locus_data) != 0){
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
950 filename.scatter = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_scatter_", threshhold, sep="")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
951 write.table(scatterplot_locus_data, file=paste(filename.scatter, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
952 if(on == "normalized_read_count"){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
953 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
954 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
955 } else {
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
956 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
957 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
958 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
959 p = p + geom_point(aes(colour=type), position="jitter")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
960 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
961 } else {
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
962 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
963 }
8
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
964 file_name = paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
965 print(paste("Writing figure:", file_name))
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
966 png(file_name)
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
967 print(p)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
968 dev.off()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
969 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
970 if(sum(all) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
971 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
972 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
973 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
974 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
975 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
976 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
977 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
978 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
979 colnames(patientResult)[6] = oneSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
980 colnames(patientResult)[7] = twoSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
981 colnames(patientResult)[8] = threeSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
982 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
983 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
984 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
985
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
986 colnamesBak = colnames(patientResult)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
987 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
988 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
989 colnames(patientResult) = colnamesBak
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
990
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
991 patientResult$Locus = factor(patientResult$Locus, Titles)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
992 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
993
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
994 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
995 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
996 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
997 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
998 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
999 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
8
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
1000 file_name = paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep="")
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
1001 print(paste("Writing figure:", file_name))
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
1002 png(file_name, width=1920, height=1080)
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1003 print(plt)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1004 dev.off()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1005
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1006 fontSize = 4
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1007
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1008 bak = patientResult
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1009 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1010 patientResult$relativeValue = patientResult$value * 10
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1011 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1012 plt = ggplot(patientResult)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1013 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1014 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1015 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1016 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1017 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1018 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1019 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
8
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
1020 file_name = paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep="")
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
1021 print(paste("Writing figure:", file_name))
eb2aa7cffca3 Uploaded
davidvanzessen
parents: 7
diff changeset
1022 png(file_name, width=1920, height=1080)
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1023 print(plt)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1024 dev.off()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1025 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1026
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1027 if(nrow(triplets) != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1028
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1029 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1030
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1031 triplets$uniqueID = paste(triplets$Patient, triplets$Sample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1032
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1033 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1034
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1035 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1036 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1037 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1038
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1039 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1040 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1041
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1042 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1043
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1044 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1045
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1046 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1047
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1048 triplets = triplets[triplets$normalized_read_count >= min_cells,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1049
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1050 column_drops = c("min_cell_count", "min_cell_paste")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1051
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1052 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1053
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1054 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1055
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1056 interval = intervalReads
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1057 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1058 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1059
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1060 triplets = split(triplets, triplets$Patient, drop=T)
5
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
1061 #print(nrow(triplets))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1062 for(triplet in triplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1063 samples = unique(triplet$Sample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1064 one = triplet[triplet$Sample == samples[1],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1065 two = triplet[triplet$Sample == samples[2],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1066 three = triplet[triplet$Sample == samples[3],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1067
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1068 print(paste(nrow(triplet), nrow(one), nrow(two), nrow(three)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1069 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="normalized_read_count", T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1070 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1071
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1072 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1073
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1074 interval = intervalFreq
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1075 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1076 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1077
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1078 for(triplet in triplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1079 samples = unique(triplet$Sample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1080 one = triplet[triplet$Sample == samples[1],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1081 two = triplet[triplet$Sample == samples[2],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1082 three = triplet[triplet$Sample == samples[3],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1083 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="Frequency", F)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1084 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1085 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1086 cat("", file="triplets.txt")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1087 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1088 cat("</table></html>", file=logfile, append=T)