annotate RScript.r @ 6:a9d2ed661541 draft

Uploaded
author davidvanzessen
date Thu, 05 Oct 2017 09:06:21 -0400
parents bcf1469e8feb
children 7ce82833977c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
2 options(scipen=999)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
3
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
4 inFile = args[1]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
5 outDir = args[2]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
6 logfile = args[3]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
7 min_freq = as.numeric(args[4])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
8 min_cells = as.numeric(args[5])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
9 mergeOn = args[6]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
11 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
12
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
13 library(ggplot2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
14 library(reshape2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
15 library(data.table)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
16 library(grid)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
17 library(parallel)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
18 #require(xtable)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
21
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
22 needed_cols = c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
23 if(!all(needed_cols %in% names(dat))){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
24 cat("Missing column(s):<br />", file=logfile, append=F)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
25 missing_cols = needed_cols[!(needed_cols %in% names(dat))]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
26 for(col in missing_cols){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
27 cat(paste(col, "<br />"), file=logfile, append=T)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
28 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
29 stop("Not all columns are present")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
30 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
31
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
32 if(!("Total_Read_Count" %in% names(dat))){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
33 dat$Total_Read_Count = 0
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
34 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
35
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
36 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
37
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
38 dat$dsPerM = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
39 dat = dat[!is.na(dat$Patient),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
40 dat$Related_to_leukemia_clone = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
41
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
42 setwd(outDir)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
43 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
44 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
45 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
46
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
47 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
48
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
49 dat$Frequency = ((10^dat$Log10_Frequency)*100)
5
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
50 dat = dat[dat$Frequency <= 100,] #just in case?
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
51
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
52 dat = dat[dat$Frequency >= min_freq,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
53
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
54 patient.sample.counts = data.frame(data.table(dat)[, list(count=.N), by=c("Patient", "Sample")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
55 patient.sample.counts = data.frame(data.table(patient.sample.counts)[, list(count=.N), by=c("Patient")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
56
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
57 print("Found the following patients with number of samples:")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
58 print(patient.sample.counts)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
59
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
60 patient.sample.counts.pairs = patient.sample.counts[patient.sample.counts$count %in% 1:2,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
61 patient.sample.counts.triplets = patient.sample.counts[patient.sample.counts$count == 3,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
62
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
63
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
64
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
65 triplets = dat[dat$Patient %in% patient.sample.counts.triplets$Patient,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
66 dat = dat[dat$Patient %in% patient.sample.counts.pairs$Patient,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
67
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
68 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
69
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
70 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
71 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
72 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
73
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
74 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
75 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
76
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
77 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
78 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
79 dat = merge(dat, min_cell_count, by="min_cell_paste")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
80 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
81 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
82
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
83 dat = dat[dat$normalized_read_count >= min_cells,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
84
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
85 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
86
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
87 patients = split(dat, dat$Patient, drop=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
88 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
89 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
90 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
91 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
92 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
93 Titles = factor(Titles, levels=Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
94 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
95
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
96 single_patients = dat[NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
97
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
98 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
99 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
100 scatter_locus_data_list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
101 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
102 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
103 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
104 if (!is.data.frame(x) & is.list(x)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
105 x = x[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
106 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
107 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
108 x = data.frame(x,stringsAsFactors=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
109 onShort = "reads"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
110 if(on == "Frequency"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
111 onShort = "freq"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
112 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
113 onx = paste(on, ".x", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
114 ony = paste(on, ".y", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
115 splt = split(x, x$Sample, drop=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
116 type="pair"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
117 if(length(splt) == 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
118 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
119 splt[[2]] = splt[[1]][NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
120 type="single"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
121 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
122 patient1 = splt[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
123 patient2 = splt[[2]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
124
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
125 oneSample = patient1[1,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
126 twoSample = patient2[1,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
127 patient = x[1,"Patient"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
128
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
129 switched = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
130 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
131 tmp = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
132 twoSample = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
133 oneSample = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
134 tmp = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
135 patient1 = patient2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
136 patient2 = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
137 switched = T
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
138 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
139 if(appendtxt){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
140 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
141 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
142 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
143
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
144 if(mergeOn == "Clone_Sequence"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
145 patient1$merge = paste(patient1$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
146 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
147 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
148 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
149 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
150 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
151
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
152 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
153 scatterplot_data = patient1[NULL,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
154 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
155 scatterplot_data$type = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
156 scatterplot_data$link = numeric(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
157 scatterplot_data$on = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
158
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
159 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
160
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
161 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
162
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
163 start.time = proc.time()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
164 merge.list = c()
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
165
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
166 if(patient %in% names(patient.merge.list)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
167 patientMerge = patient.merge.list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
168 merge.list[["second"]] = patient.merge.list.second[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
169 scatterplot_data = scatter_locus_data_list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
170 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
171
5
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
172 #print(names(patient.merge.list))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
173 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
174 #fuzzy matching here...
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
175
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
176 patient1.fuzzy = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
177 patient2.fuzzy = patient2
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
178
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
179 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
180 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
181
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
182 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
183 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
184
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
185 merge.list = list()
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
186
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
187 merge.list[["second"]] = vector()
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
188
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
189 link.count = 1
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
190
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
191 while(nrow(patient.fuzzy) > 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
192 first.merge = patient.fuzzy[1,"merge"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
193 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
194 first.sample = patient.fuzzy[1,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
195 merge.filter = first.merge == patient.fuzzy$merge
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
196
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
197 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
198
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
199 first.sample.filter = first.sample == patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
200 second.sample.filter = first.sample != patient.fuzzy$Sample
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
201
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
202 #first match same sample, sum to a single row, same for other sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
203 #then merge rows like 'normal'
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
204
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
205 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
206
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
207
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
208
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
209 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
210 first.match.filter = merge.filter & sequence.filter & first.sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
211 second.match.filter = merge.filter & sequence.filter & second.sample.filter
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
212
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
213 first.rows = patient.fuzzy[first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
214 second.rows = patient.fuzzy[second.match.filter,]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
215
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
216 first.rows.v = table(first.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
217 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
218 first.rows.j = table(first.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
219 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
220
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
221 first.sum = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
222 Patient = patient,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
223 Receptor = first.rows[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
224 Sample = first.rows[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
225 Cell_Count = first.rows[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
226 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
227 Log10_Frequency = log10(sum(first.rows$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
228 Total_Read_Count = sum(first.rows$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
229 dsPerM = sum(first.rows$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
230 J_Segment_Major_Gene = first.rows.j,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
231 V_Segment_Major_Gene = first.rows.v,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
232 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
233 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
234 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
235 Frequency = sum(first.rows$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
236 locus_V = first.rows[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
237 locus_J = first.rows[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
238 min_cell_count = first.rows[1,"min_cell_count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
239 normalized_read_count = sum(first.rows$normalized_read_count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
240 paste = first.rows[1,"paste"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
241 min_cell_paste = first.rows[1,"min_cell_paste"])
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
242
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
243 if(nrow(second.rows) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
244 second.rows.v = table(second.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
245 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
246 second.rows.j = table(second.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
247 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
248
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
249 second.sum = data.frame(merge = first.clone.sequence,
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
250 Patient = patient,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
251 Receptor = second.rows[1,"Receptor"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
252 Sample = second.rows[1,"Sample"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
253 Cell_Count = second.rows[1,"Cell_Count"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
254 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
255 Log10_Frequency = log10(sum(second.rows$Frequency)),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
256 Total_Read_Count = sum(second.rows$Total_Read_Count),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
257 dsPerM = sum(second.rows$dsPerM),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
258 J_Segment_Major_Gene = second.rows.j,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
259 V_Segment_Major_Gene = second.rows.v,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
260 Clone_Sequence = first.clone.sequence,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
261 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
262 Related_to_leukemia_clone = F,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
263 Frequency = sum(second.rows$Frequency),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
264 locus_V = second.rows[1,"locus_V"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
265 locus_J = second.rows[1,"locus_J"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
266 min_cell_count = second.rows[1,"min_cell_count"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
267 normalized_read_count = sum(second.rows$normalized_read_count),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
268 paste = second.rows[1,"paste"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
269 min_cell_paste = second.rows[1,"min_cell_paste"])
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
270
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
271 #print(names(patientMerge))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
272 #print(merge(first.sum, second.sum, by="merge"))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
273 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
274 #print("test2")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
275 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
276
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
277 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
278 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
279
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
280 tmp.rows = rbind(first.rows, second.rows)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
281 #print("test3")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
282 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
283
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
284
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
285 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
286 scatterplot.row = first.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
287 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
288 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
289 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
290
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
291 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
292
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
293 scatterplot.row = second.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
294 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
295 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
296 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
297
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
298 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
299
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
300 #write some information about the match to a log file
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
301 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
302 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
303 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
304 second.clone.sequence = second.rows[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
305 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
306 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
307 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
308 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
309 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
310 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
311
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
312 } else if(nrow(first.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
313 if(patient1[1,"Sample"] == first.sample){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
314 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
315 patient1 = rbind(patient1, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
316 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
317 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
318 patient2 = rbind(patient2, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
319 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
320
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
321 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
322 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
323
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
324 patient.fuzzy = patient.fuzzy[-first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
325
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
326 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
327 scatterplot.row = first.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
328 scatterplot.row$type = first.sum[,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
329 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
330 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
331
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
332 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
333
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
334 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
335 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
336 patient.fuzzy = patient.fuzzy[-1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
337
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
338 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
339 scatterplot.row = first.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
340 scatterplot.row$type = first.sum[,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
341 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
342 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
343
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
344 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
345 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
346 link.count = link.count + 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
347 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
348 patient.merge.list[[patient]] <<- patientMerge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
349 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
350
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
351 sample.order = data.frame(type = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")),type.order = 1:4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
352 scatterplot_data = merge(scatterplot_data, sample.order, by="type")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
353
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
354 scatter_locus_data_list[[patient]] <<- scatterplot_data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
355 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
356 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
357
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
358 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
359 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
360
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
361
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
362 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
363 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
364 res1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
365 res2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
366 resBoth = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
367 read1Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
368 read2Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
369 locussum1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
370 locussum2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
371
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
372 #for(iter in 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
373 for(iter in 1:length(product[,1])){
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
374 threshhold = product[iter,"interval"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
375 V_Segment = paste(".*", as.character(product[iter,"V_Segments"]), ".*", sep="")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
376 J_Segment = paste(".*", as.character(product[iter,"J_Segments"]), ".*", sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
377 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
378 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
379 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
380 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
381 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
382 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
383 res1 = append(res1, sum(one))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
384 res2 = append(res2, sum(two))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
385 resBoth = append(resBoth, sum(both))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
386 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
387 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
388 #threshhold = 0
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
389 if(threshhold != 0 | T){
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
390 if(sum(one) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
391 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
392 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
393 filenameOne = paste(oneSample, "_", product[iter, "Titles"], "_", threshhold, sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
394 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
395 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
396 if(sum(two) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
397 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
398 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
399 filenameTwo = paste(twoSample, "_", product[iter, "Titles"], "_", threshhold, sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
400 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
401 }
6
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
402 }
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
403 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
404 if(nrow(scatterplot_locus_data) > 0){
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
405 scatterplot_locus_data$Rearrangement = product[iter, "Titles"]
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
406 }
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
407 p = NULL
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
408 #print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data)))
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
409 if(nrow(scatterplot_locus_data) != 0){
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
410 if(on == "normalized_read_count"){
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
411 write.table(scatterplot_locus_data, file=paste(oneSample, twoSample, product[iter, "Titles"], "scatterplot_locus_data.txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
412 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
413 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales, limits=c(1,1e6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
414 } else {
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
415 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
416 }
6
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
417 p = p + geom_point(aes(colour=type), position="dodge")
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
418 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"]))
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
419 } else {
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
420 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"]))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
421 }
6
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
422 png(paste(patient1[1,"Patient"], "_", patient1[1,"Sample"], "_", patient2[1,"Sample"], "_", onShort, "_", product[iter, "Titles"],"_scatter.png", sep=""))
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
423 print(p)
a9d2ed661541 Uploaded
davidvanzessen
parents: 5
diff changeset
424 dev.off()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
425 if(sum(both) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
426 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
427 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
428 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, "Titles"], "_", threshhold, sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
429 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
430 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
431 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
432 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
433 if(sum(is.na(patientResult$percentage)) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
434 patientResult[is.na(patientResult$percentage),]$percentage = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
435 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
436 colnames(patientResult)[6] = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
437 colnames(patientResult)[8] = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
438 colnamesBak = colnames(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
439 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
440 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
441 colnames(patientResult) = colnamesBak
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
442
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
443 patientResult$Locus = factor(patientResult$Locus, Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
444 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
445
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
446 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
447 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
448 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
449 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
450 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
451 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
452 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
453 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
454 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
455 #(t,r,b,l)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
456 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
457 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
458 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
459 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
460 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
461 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
462 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
463 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
464 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
465
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
466 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
467 patientResult$relativeValue = patientResult$value * 10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
468 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
469 plt = ggplot(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
470 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
471 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
472 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
473 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
474 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
475 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
476 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
477 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
478 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
479 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
480
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
481 if(length(patients) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
482 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
483
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
484 interval = intervalFreq
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
485 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
486 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
487 for (current_patient in patients){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
488 print(paste("Started working", unique(current_patient$Patient), "Frequency analysis"))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
489 patientCountOnColumn(current_patient, product=product, interval=interval, on="Frequency", appendtxt=T)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
490 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
491
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
492 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
493
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
494 interval = intervalReads
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
495 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
496 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
497 for (current_patient in patients){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
498 print(paste("Started working on ", unique(current_patient$Patient), "Read Count analysis"))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
499 patientCountOnColumn(current_patient, product=product, interval=interval, on="normalized_read_count")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
500 }
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
501 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
502 if(nrow(single_patients) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
503 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
504 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
505 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
506 p = p + xlab("In one or both samples") + ylab("Reads")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
507 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
508 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
509 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
510 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
511
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
512 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
513 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
514 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
515 p = p + xlab("In one or both samples") + ylab("Frequency")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
516 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
517 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
518 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
519 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
520 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
521 empty <- data.frame()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
522 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
523
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
524 png("singles_reads_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
525 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
526 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
527
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
528 png("singles_freq_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
529 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
530 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
531 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
532
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
533 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
534 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
535
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
536 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
537 onShort = "reads"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
538 if(on == "Frequency"){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
539 onShort = "freq"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
540 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
541 onx = paste(on, ".x", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
542 ony = paste(on, ".y", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
543 onz = paste(on, ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
544 type="triplet"
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
545
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
546 threshholdIndex = which(colnames(product) == "interval")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
547 V_SegmentIndex = which(colnames(product) == "V_Segments")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
548 J_SegmentIndex = which(colnames(product) == "J_Segments")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
549 titleIndex = which(colnames(product) == "Titles")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
550 sampleIndex = which(colnames(patient1) == "Sample")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
551 patientIndex = which(colnames(patient1) == "Patient")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
552 oneSample = paste(patient1[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
553 twoSample = paste(patient2[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
554 threeSample = paste(patient3[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
555
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
556 if(mergeOn == "Clone_Sequence"){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
557 patient1$merge = paste(patient1$Clone_Sequence)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
558 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
559 patient3$merge = paste(patient3$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
560
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
561 } else {
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
562 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
563 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
564 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
565 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
566
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
567 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
568 patient1.fuzzy = patient1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
569 patient2.fuzzy = patient2
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
570 patient3.fuzzy = patient3
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
571
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
572 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
573
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
574 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
575 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
576 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
577
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
578 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
579 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
580
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
581 other.sample.list = list()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
582 other.sample.list[[oneSample]] = c(twoSample, threeSample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
583 other.sample.list[[twoSample]] = c(oneSample, threeSample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
584 other.sample.list[[threeSample]] = c(oneSample, twoSample)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
585
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
586 patientMerge = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
587 patientMerge = merge(patientMerge, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
588 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
589 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
590 patientMerge = patientMerge[NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
591
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
592 duo.merge.list = list()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
593
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
594 patientMerge12 = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
595 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
596 patientMerge12 = patientMerge12[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
597 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
598 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
599
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
600 patientMerge13 = merge(patient1, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
601 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
602 patientMerge13 = patientMerge13[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
603 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
604 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
605
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
606 patientMerge23 = merge(patient2, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
607 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
608 patientMerge23 = patientMerge23[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
609 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
610 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
611
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
612 merge.list = list()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
613 merge.list[["second"]] = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
614
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
615 #print(paste(nrow(patient1), nrow(patient2), nrow(patient3), label1, label2, label3))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
616
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
617 start.time = proc.time()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
618 if(paste(label1, "123") %in% names(patient.merge.list)){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
619 patientMerge = patient.merge.list[[paste(label1, "123")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
620 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
621 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
622 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
623
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
624 #merge.list[["second"]] = patient.merge.list.second[[label1]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
625
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
626 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
627 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
628 while(nrow(patient.fuzzy) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
629 first.merge = patient.fuzzy[1,"merge"]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
630 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
631 first.sample = paste(patient.fuzzy[1,"Sample"], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
632
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
633 merge.filter = first.merge == patient.fuzzy$merge
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
634
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
635 second.sample = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
636 third.sample = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
637
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
638 sample.filter.1 = first.sample == patient.fuzzy$Sample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
639 sample.filter.2 = second.sample == patient.fuzzy$Sample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
640 sample.filter.3 = third.sample == patient.fuzzy$Sample
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
641
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
642 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
643
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
644 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
645 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
646 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
647
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
648 matches.in.1 = any(match.filter.1)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
649 matches.in.2 = any(match.filter.2)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
650 matches.in.3 = any(match.filter.3)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
651
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
652 rows.1 = patient.fuzzy[match.filter.1,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
653
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
654 sum.1 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
655 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
656 Receptor = rows.1[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
657 Sample = rows.1[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
658 Cell_Count = rows.1[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
659 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
660 Log10_Frequency = log10(sum(rows.1$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
661 Total_Read_Count = sum(rows.1$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
662 dsPerM = sum(rows.1$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
663 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
664 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
665 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
666 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
667 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
668 Frequency = sum(rows.1$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
669 locus_V = rows.1[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
670 locus_J = rows.1[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
671 uniqueID = rows.1[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
672 normalized_read_count = sum(rows.1$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
673 sum.2 = sum.1[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
674 rows.2 = patient.fuzzy[match.filter.2,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
675 if(matches.in.2){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
676 sum.2 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
677 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
678 Receptor = rows.2[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
679 Sample = rows.2[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
680 Cell_Count = rows.2[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
681 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
682 Log10_Frequency = log10(sum(rows.2$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
683 Total_Read_Count = sum(rows.2$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
684 dsPerM = sum(rows.2$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
685 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
686 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
687 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
688 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
689 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
690 Frequency = sum(rows.2$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
691 locus_V = rows.2[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
692 locus_J = rows.2[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
693 uniqueID = rows.2[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
694 normalized_read_count = sum(rows.2$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
695 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
696
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
697 sum.3 = sum.1[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
698 rows.3 = patient.fuzzy[match.filter.3,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
699 if(matches.in.3){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
700 sum.3 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
701 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
702 Receptor = rows.3[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
703 Sample = rows.3[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
704 Cell_Count = rows.3[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
705 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
706 Log10_Frequency = log10(sum(rows.3$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
707 Total_Read_Count = sum(rows.3$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
708 dsPerM = sum(rows.3$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
709 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
710 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
711 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
712 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
713 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
714 Frequency = sum(rows.3$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
715 locus_V = rows.3[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
716 locus_J = rows.3[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
717 uniqueID = rows.3[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
718 normalized_read_count = sum(rows.3$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
719 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
720
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
721 if(matches.in.2 & matches.in.3){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
722 merge.123 = merge(sum.1, sum.2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
723 merge.123 = merge(merge.123, sum.3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
724 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
725 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
726
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
727 patientMerge = rbind(patientMerge, merge.123)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
728 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
729
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
730 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
731 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
732
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
733 } else if (matches.in.2) {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
734 #other.sample1 = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
735 #other.sample2 = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
736
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
737 second.sample = sum.2[,"Sample"]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
738
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
739 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
740
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
741 merge.12 = merge(sum.1, sum.2, by="merge")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
742
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
743 current.merge.list = rbind(current.merge.list, merge.12)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
744 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
745
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
746 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
747
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
748 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
749 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
750
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
751 } else if (matches.in.3) {
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
752
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
753 #other.sample1 = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
754 #other.sample2 = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
755
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
756 second.sample = sum.3[,"Sample"]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
757
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
758 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
759
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
760 merge.13 = merge(sum.1, sum.3, by="merge")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
761
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
762 current.merge.list = rbind(current.merge.list, merge.13)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
763 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
764
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
765 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
766
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
767 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
768 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
769
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
770 } else if(nrow(rows.1) > 1){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
771 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
5
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
772 #print(names(patient1)[names(patient1) %in% sum.1])
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
773 #print(names(patient1)[!(names(patient1) %in% sum.1)])
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
774 #print(names(patient1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
775 #print(names(sum.1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
776 #print(summary(sum.1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
777 #print(summary(patient1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
778 #print(dim(sum.1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
779 #print(dim(patient1))
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
780 #print(head(sum.1[,names(patient1)]))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
781 patient1 = rbind(patient1, sum.1[,names(patient1)])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
782 patient.fuzzy = patient.fuzzy[-match.filter.1,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
783 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
784 patient.fuzzy = patient.fuzzy[-1,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
785 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
786
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
787 tmp.rows = rbind(rows.1, rows.2, rows.3)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
788 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
789
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
790 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
791 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
792 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
793 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
794
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
795 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
796 patient.merge.list[[paste(label1, "123")]] = patientMerge
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
797
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
798 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
799 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
800 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
801
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
802 patient.merge.list[[paste(label1, "12")]] = patientMerge12
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
803 patient.merge.list[[paste(label1, "13")]] = patientMerge13
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
804 patient.merge.list[[paste(label1, "23")]] = patientMerge23
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
805
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
806 #patient.merge.list.second[[label1]] = merge.list[["second"]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
807 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
808 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
809 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
810 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
811 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
812 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
813
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
814 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
815 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
816 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
817 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
818
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
819 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
820 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
821 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
822
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
823 if(F){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
824 patientMerge = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
825 patientMerge = merge(patientMerge, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
826 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
827 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
828 patientMerge12 = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
829 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
830 patientMerge13 = merge(patient1, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
831 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
832 patientMerge23 = merge(patient2, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
833 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
834 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
835
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
836 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
837 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
838 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
839
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
840 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
841
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
842 res1 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
843 res2 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
844 res3 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
845 res12 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
846 res13 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
847 res23 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
848 resAll = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
849 read1Count = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
850 read2Count = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
851 read3Count = vector()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
852
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
853 if(appendTriplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
854 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
855 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
856 for(iter in 1:length(product[,1])){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
857 threshhold = product[iter,threshholdIndex]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
858 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
859 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
860 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
861 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
862
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
863 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
864 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
865 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
866
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
867 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
868 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
869 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
870
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
871 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
872 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
873 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
874 res1 = append(res1, sum(one))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
875 res2 = append(res2, sum(two))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
876 res3 = append(res3, sum(three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
877 resAll = append(resAll, sum(all))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
878 res12 = append(res12, sum(one_two))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
879 res13 = append(res13, sum(one_three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
880 res23 = append(res23, sum(two_three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
881 #threshhold = 0
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
882 if(threshhold != 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
883 if(sum(one) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
884 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
885 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
886 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
887 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
888 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
889 if(sum(two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
890 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
891 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
892 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
893 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
894 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
895 if(sum(three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
896 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
897 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
898 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
899 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
900 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
901 if(sum(one_two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
902 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
903 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
904 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
905 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
906 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
907 if(sum(one_three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
908 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
909 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
910 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
911 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
912 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
913 if(sum(two_three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
914 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
915 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
916 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
917 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
918 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
919 } else { #scatterplot data
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
920 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
921 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
922 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
923 if(sum(in_two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
924 scatterplot_locus_data[in_two,]$type = "In two"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
925 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
926 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
927 if(sum(in_three)> 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
928 scatterplot_locus_data[in_three,]$type = "In three"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
929 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
930 not_in_one = scatterplot_locus_data$type != "In one"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
931 if(sum(not_in_one) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
932 #scatterplot_locus_data[not_in_one,]$type = "In multiple"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
933 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
934 p = NULL
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
935 if(nrow(scatterplot_locus_data) != 0){
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
936 filename.scatter = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_scatter_", threshhold, sep="")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
937 write.table(scatterplot_locus_data, file=paste(filename.scatter, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
938 if(on == "normalized_read_count"){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
939 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
940 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
941 } else {
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
942 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
943 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
944 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
945 p = p + geom_point(aes(colour=type), position="jitter")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
946 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
947 } else {
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
948 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
949 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
950 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
951 print(p)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
952 dev.off()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
953 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
954 if(sum(all) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
955 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
956 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
957 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
958 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
959 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
960 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
961 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
962 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
963 colnames(patientResult)[6] = oneSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
964 colnames(patientResult)[7] = twoSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
965 colnames(patientResult)[8] = threeSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
966 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
967 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
968 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
969
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
970 colnamesBak = colnames(patientResult)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
971 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
972 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
973 colnames(patientResult) = colnamesBak
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
974
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
975 patientResult$Locus = factor(patientResult$Locus, Titles)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
976 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
977
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
978 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
979 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
980 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
981 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
982 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
983 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
984 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
985 print(plt)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
986 dev.off()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
987
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
988 fontSize = 4
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
989
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
990 bak = patientResult
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
991 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
992 patientResult$relativeValue = patientResult$value * 10
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
993 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
994 plt = ggplot(patientResult)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
995 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
996 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
997 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
998 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
999 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1000 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1001 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1002 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1003 print(plt)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1004 dev.off()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1005 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1006
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1007 if(nrow(triplets) != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1008
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1009 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1010
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1011 triplets$uniqueID = paste(triplets$Patient, triplets$Sample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1012
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1013 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1014
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1015 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1016 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1017 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1018
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1019 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1020 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1021
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1022 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1023
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1024 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1025
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1026 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1027
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1028 triplets = triplets[triplets$normalized_read_count >= min_cells,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1029
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1030 column_drops = c("min_cell_count", "min_cell_paste")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1031
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1032 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1033
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1034 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1035
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1036 interval = intervalReads
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1037 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1038 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1039
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1040 triplets = split(triplets, triplets$Patient, drop=T)
5
bcf1469e8feb Uploaded
davidvanzessen
parents: 4
diff changeset
1041 #print(nrow(triplets))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1042 for(triplet in triplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1043 samples = unique(triplet$Sample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1044 one = triplet[triplet$Sample == samples[1],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1045 two = triplet[triplet$Sample == samples[2],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1046 three = triplet[triplet$Sample == samples[3],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1047
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1048 print(paste(nrow(triplet), nrow(one), nrow(two), nrow(three)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1049 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="normalized_read_count", T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1050 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1051
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1052 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1053
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1054 interval = intervalFreq
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1055 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1056 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1057
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1058 for(triplet in triplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1059 samples = unique(triplet$Sample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1060 one = triplet[triplet$Sample == samples[1],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1061 two = triplet[triplet$Sample == samples[2],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1062 three = triplet[triplet$Sample == samples[3],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1063 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="Frequency", F)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1064 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1065 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1066 cat("", file="triplets.txt")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1067 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1068 cat("</table></html>", file=logfile, append=T)