annotate RScript.r @ 2:7ffd0fba8cf4 draft

Uploaded
author davidvanzessen
date Mon, 18 Sep 2017 09:01:19 -0400
parents 75853bceec00
children 20f0df3721aa
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
2 options(scipen=999)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
3
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
4 inFile = args[1]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
5 outDir = args[2]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
6 logfile = args[3]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
7 min_freq = as.numeric(args[4])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
8 min_cells = as.numeric(args[5])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
9 mergeOn = args[6]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
11 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
12
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
13 library(ggplot2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
14 library(reshape2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
15 library(data.table)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
16 library(grid)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
17 library(parallel)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
18 #require(xtable)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
21
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
22 needed_cols = c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
23 if(!all(needed_cols %in% names(dat))){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
24 cat("Missing column(s):<br />", file=logfile, append=F)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
25 missing_cols = needed_cols[!(needed_cols %in% names(dat))]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
26 for(col in missing_cols){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
27 cat(paste(col, "<br />"), file=logfile, append=T)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
28 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
29 stop("Not all columns are present")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
30 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
31
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
32 if(!("Total_Read_Count" %in% names(dat))){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
33 dat$Total_Read_Count = 0
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
34 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
35
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
36 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
37
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
38 dat$dsPerM = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
39 dat = dat[!is.na(dat$Patient),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
40 dat$Related_to_leukemia_clone = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
41
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
42 setwd(outDir)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
43 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
44 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
45 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
46
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
47 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
48
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
49 dat$Frequency = ((10^dat$Log10_Frequency)*100)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
50
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
51 dat = dat[dat$Frequency >= min_freq,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
52
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
53 patient.sample.counts = data.frame(data.table(dat)[, list(count=.N), by=c("Patient", "Sample")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
54 patient.sample.counts = data.frame(data.table(patient.sample.counts)[, list(count=.N), by=c("Patient")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
55
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
56 print("Found the following patients with number of samples:")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
57 print(patient.sample.counts)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
58
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
59 patient.sample.counts.pairs = patient.sample.counts[patient.sample.counts$count %in% 1:2,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
60 patient.sample.counts.triplets = patient.sample.counts[patient.sample.counts$count == 3,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
61
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
62
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
63
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
64 triplets = dat[dat$Patient %in% patient.sample.counts.triplets$Patient,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
65 dat = dat[dat$Patient %in% patient.sample.counts.pairs$Patient,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
66
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
67 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
68
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
69 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
70 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
71 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
72
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
73 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
74 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
75
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
76 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
77 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
78 dat = merge(dat, min_cell_count, by="min_cell_paste")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
79 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
80 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
81
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
82 dat = dat[dat$normalized_read_count >= min_cells,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
83
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
84 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
85
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
86 patients = split(dat, dat$Patient, drop=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
87 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
88 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
89 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
90 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
91 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
92 Titles = factor(Titles, levels=Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
93 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
94
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
95 single_patients = dat[NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
96
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
97 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
98 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
99 scatter_locus_data_list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
100 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
101 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
102 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
103 if (!is.data.frame(x) & is.list(x)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
104 x = x[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
105 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
106 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
107 x = data.frame(x,stringsAsFactors=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
108 onShort = "reads"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
109 if(on == "Frequency"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
110 onShort = "freq"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
111 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
112 onx = paste(on, ".x", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
113 ony = paste(on, ".y", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
114 splt = split(x, x$Sample, drop=T)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
115 print(splt)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
116 type="pair"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
117 if(length(splt) == 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
118 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
119 splt[[2]] = splt[[1]][NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
120 type="single"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
121 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
122 patient1 = splt[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
123 patient2 = splt[[2]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
124
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
125 oneSample = patient1[1,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
126 twoSample = patient2[1,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
127 patient = x[1,"Patient"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
128
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
129 switched = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
130 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
131 tmp = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
132 twoSample = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
133 oneSample = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
134 tmp = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
135 patient1 = patient2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
136 patient2 = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
137 switched = T
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
138 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
139 if(appendtxt){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
140 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
141 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
142 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
143
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
144 if(mergeOn == "Clone_Sequence"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
145 patient1$merge = paste(patient1$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
146 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
147 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
148 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
149 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
150 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
151
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
152 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
153 scatterplot_data = patient1[NULL,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
154 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
155 scatterplot_data$type = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
156 scatterplot_data$link = numeric(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
157 scatterplot_data$on = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
158
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
159 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
160
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
161 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
162
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
163 start.time = proc.time()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
164 merge.list = c()
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
165
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
166 if(patient %in% names(patient.merge.list)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
167 patientMerge = patient.merge.list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
168 merge.list[["second"]] = patient.merge.list.second[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
169 scatterplot_data = scatter_locus_data_list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
170 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
171
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
172 print(names(patient.merge.list))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
173 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
174 #fuzzy matching here...
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
175
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
176 patient1.fuzzy = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
177 patient2.fuzzy = patient2
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
178
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
179 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
180 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
181
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
182 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
183 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
184
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
185 merge.list = list()
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
186
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
187 merge.list[["second"]] = vector()
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
188
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
189 link.count = 1
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
190
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
191 while(nrow(patient.fuzzy) > 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
192 first.merge = patient.fuzzy[1,"merge"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
193 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
194 first.sample = patient.fuzzy[1,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
195 merge.filter = first.merge == patient.fuzzy$merge
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
196
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
197 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
198
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
199 first.sample.filter = first.sample == patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
200 second.sample.filter = first.sample != patient.fuzzy$Sample
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
201
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
202 #first match same sample, sum to a single row, same for other sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
203 #then merge rows like 'normal'
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
204
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
205 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
206
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
207
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
208
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
209 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
210 first.match.filter = merge.filter & sequence.filter & first.sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
211 second.match.filter = merge.filter & sequence.filter & second.sample.filter
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
212
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
213 first.rows = patient.fuzzy[first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
214 second.rows = patient.fuzzy[second.match.filter,]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
215
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
216 first.rows.v = table(first.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
217 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
218 first.rows.j = table(first.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
219 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
220
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
221 first.sum = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
222 Patient = patient,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
223 Receptor = first.rows[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
224 Sample = first.rows[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
225 Cell_Count = first.rows[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
226 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
227 Log10_Frequency = log10(sum(first.rows$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
228 Total_Read_Count = sum(first.rows$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
229 dsPerM = sum(first.rows$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
230 J_Segment_Major_Gene = first.rows.j,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
231 V_Segment_Major_Gene = first.rows.v,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
232 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
233 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
234 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
235 Frequency = sum(first.rows$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
236 locus_V = first.rows[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
237 locus_J = first.rows[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
238 min_cell_count = first.rows[1,"min_cell_count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
239 normalized_read_count = sum(first.rows$normalized_read_count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
240 paste = first.rows[1,"paste"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
241 min_cell_paste = first.rows[1,"min_cell_paste"])
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
242
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
243 if(nrow(second.rows) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
244 second.rows.v = table(second.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
245 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
246 second.rows.j = table(second.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
247 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
248
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
249 second.sum = data.frame(merge = first.clone.sequence,
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
250 Patient = patient,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
251 Receptor = second.rows[1,"Receptor"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
252 Sample = second.rows[1,"Sample"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
253 Cell_Count = second.rows[1,"Cell_Count"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
254 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
255 Log10_Frequency = log10(sum(second.rows$Frequency)),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
256 Total_Read_Count = sum(second.rows$Total_Read_Count),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
257 dsPerM = sum(second.rows$dsPerM),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
258 J_Segment_Major_Gene = second.rows.j,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
259 V_Segment_Major_Gene = second.rows.v,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
260 Clone_Sequence = first.clone.sequence,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
261 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
262 Related_to_leukemia_clone = F,
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
263 Frequency = sum(second.rows$Frequency),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
264 locus_V = second.rows[1,"locus_V"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
265 locus_J = second.rows[1,"locus_J"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
266 min_cell_count = second.rows[1,"min_cell_count"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
267 normalized_read_count = sum(second.rows$normalized_read_count),
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
268 paste = second.rows[1,"paste"],
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
269 min_cell_paste = second.rows[1,"min_cell_paste"])
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
270
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
271 #print(names(patientMerge))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
272 #print(merge(first.sum, second.sum, by="merge"))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
273 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
274 #print("test2")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
275 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
276
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
277 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
278 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
279
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
280 tmp.rows = rbind(first.rows, second.rows)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
281 #print("test3")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
282 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
283
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
284
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
285 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
286 scatterplot.row = first.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
287 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
288 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
289 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
290
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
291 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
292
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
293 scatterplot.row = second.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
294 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
295 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
296 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
297
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
298 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
299
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
300 #write some information about the match to a log file
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
301 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
302 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
303 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
304 second.clone.sequence = second.rows[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
305 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
306 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
307 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
308 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
309 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
310 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
311
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
312 } else if(nrow(first.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
313 if(patient1[1,"Sample"] == first.sample){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
314 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
315 patient1 = rbind(patient1, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
316 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
317 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
318 patient2 = rbind(patient2, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
319 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
320
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
321 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
322 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
323
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
324 patient.fuzzy = patient.fuzzy[-first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
325
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
326 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
327 scatterplot.row = first.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
328 scatterplot.row$type = first.sum[,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
329 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
330 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
331
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
332 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
333
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
334 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
335 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
336 patient.fuzzy = patient.fuzzy[-1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
337
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
338 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
339 scatterplot.row = first.sum[,scatterplot_data_columns]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
340 scatterplot.row$type = first.sum[,"Sample"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
341 scatterplot.row$link = link.count
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
342 scatterplot.row$on = onShort
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
343
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
344 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
345 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
346 link.count = link.count + 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
347 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
348 patient.merge.list[[patient]] <<- patientMerge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
349 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
350
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
351 sample.order = data.frame(type = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")),type.order = 1:4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
352 scatterplot_data = merge(scatterplot_data, sample.order, by="type")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
353
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
354 scatter_locus_data_list[[patient]] <<- scatterplot_data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
355 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
356 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
357
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
358 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
359 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
360
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
361
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
362 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
363 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
364 res1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
365 res2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
366 resBoth = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
367 read1Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
368 read2Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
369 locussum1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
370 locussum2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
371
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
372 #for(iter in 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
373 for(iter in 1:length(product[,1])){
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
374 threshhold = product[iter,"interval"]
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
375 V_Segment = paste(".*", as.character(product[iter,"V_Segments"]), ".*", sep="")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
376 J_Segment = paste(".*", as.character(product[iter,"J_Segments"]), ".*", sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
377 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
378 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
379 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
380 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
381 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
382 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
383 res1 = append(res1, sum(one))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
384 res2 = append(res2, sum(two))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
385 resBoth = append(resBoth, sum(both))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
386 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
387 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
388 #threshhold = 0
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
389 if(threshhold != 0 | T){
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
390 if(sum(one) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
391 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
392 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
393 filenameOne = paste(oneSample, "_", product[iter, "Titles"], "_", threshhold, sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
394 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
395 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
396 if(sum(two) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
397 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
398 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
399 filenameTwo = paste(twoSample, "_", product[iter, "Titles"], "_", threshhold, sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
400 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
401 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
402 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
403 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
404 if(nrow(scatterplot_locus_data) > 0){
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
405 scatterplot_locus_data$Rearrangement = product[iter, "Titles"]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
406 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
407
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
408
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
409
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
410 p = NULL
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
411 #print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data)))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
412 if(nrow(scatterplot_locus_data) != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
413 if(on == "normalized_read_count"){
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
414 write.table(scatterplot_locus_data, file=paste(oneSample, twoSample, product[iter, "Titles"], "scatterplot_locus_data.txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
415 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
416 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales, limits=c(1,1e6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
417 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
418 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
419 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
420 p = p + geom_point(aes(colour=type), position="dodge")
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
421 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"]))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
422 } else {
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
423 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, titleIndex]))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
424 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
425 png(paste(patient1[1,"Patient"], "_", patient1[1,"Sample"], "_", patient2[1,"Sample"], "_", onShort, "_", product[iter, "Titles"],"_scatter.png", sep=""))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
426 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
427 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
428 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
429 if(sum(both) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
430 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
431 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
432 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, "Titles"], "_", threshhold, sep="")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
433 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
434 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
435 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
436 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
437 if(sum(is.na(patientResult$percentage)) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
438 patientResult[is.na(patientResult$percentage),]$percentage = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
439 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
440 colnames(patientResult)[6] = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
441 colnames(patientResult)[8] = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
442 colnamesBak = colnames(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
443 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
444 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
445 colnames(patientResult) = colnamesBak
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
446
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
447 patientResult$Locus = factor(patientResult$Locus, Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
448 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
449
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
450 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
451 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
452 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
453 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
454 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
455 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
456 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
457 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
458 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
459 #(t,r,b,l)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
460 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
461 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
462 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
463 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
464 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
465 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
466 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
467 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
468 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
469
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
470 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
471 patientResult$relativeValue = patientResult$value * 10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
472 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
473 plt = ggplot(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
474 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
475 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
476 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
477 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
478 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
479 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
480 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
481 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
482 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
483 }
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
484
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
485 if(length(patients) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
486 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
487
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
488 interval = intervalFreq
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
489 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
490 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
491 for (current_patient in patients){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
492 print(paste("Started working", unique(current_patient$Patient), "Frequency analysis"))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
493 patientCountOnColumn(current_patient, product=product, interval=interval, on="Frequency", appendtxt=T)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
494 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
495
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
496 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
497
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
498 interval = intervalReads
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
499 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
500 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
501 for (current_patient in patients){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
502 print(paste("Started working on ", unique(current_patient$Patient), "Read Count analysis"))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
503 patientCountOnColumn(current_patient, product=product, interval=interval, on="normalized_read_count")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
504 }
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
505 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
506 if(nrow(single_patients) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
507 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
508 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
509 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
510 p = p + xlab("In one or both samples") + ylab("Reads")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
511 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
512 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
513 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
514 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
515
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
516 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
517 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
518 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
519 p = p + xlab("In one or both samples") + ylab("Frequency")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
520 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
521 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
522 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
523 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
524 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
525 empty <- data.frame()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
526 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
527
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
528 png("singles_reads_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
529 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
530 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
531
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
532 png("singles_freq_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
533 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
534 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
535 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
536
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
537 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
538 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
539
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
540 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
541 onShort = "reads"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
542 if(on == "Frequency"){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
543 onShort = "freq"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
544 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
545 onx = paste(on, ".x", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
546 ony = paste(on, ".y", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
547 onz = paste(on, ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
548 type="triplet"
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
549
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
550 threshholdIndex = which(colnames(product) == "interval")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
551 V_SegmentIndex = which(colnames(product) == "V_Segments")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
552 J_SegmentIndex = which(colnames(product) == "J_Segments")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
553 titleIndex = which(colnames(product) == "Titles")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
554 sampleIndex = which(colnames(patient1) == "Sample")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
555 patientIndex = which(colnames(patient1) == "Patient")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
556 oneSample = paste(patient1[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
557 twoSample = paste(patient2[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
558 threeSample = paste(patient3[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
559
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
560 if(mergeOn == "Clone_Sequence"){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
561 patient1$merge = paste(patient1$Clone_Sequence)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
562 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
563 patient3$merge = paste(patient3$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
564
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
565 } else {
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
566 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
567 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
568 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
569 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
570
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
571 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
572 patient1.fuzzy = patient1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
573 patient2.fuzzy = patient2
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
574 patient3.fuzzy = patient3
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
575
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
576 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
577
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
578 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
579 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
580 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
581
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
582 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
583 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
584
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
585 other.sample.list = list()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
586 other.sample.list[[oneSample]] = c(twoSample, threeSample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
587 other.sample.list[[twoSample]] = c(oneSample, threeSample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
588 other.sample.list[[threeSample]] = c(oneSample, twoSample)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
589
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
590 patientMerge = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
591 patientMerge = merge(patientMerge, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
592 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
593 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
594 patientMerge = patientMerge[NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
595
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
596 duo.merge.list = list()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
597
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
598 patientMerge12 = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
599 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
600 patientMerge12 = patientMerge12[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
601 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
602 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
603
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
604 patientMerge13 = merge(patient1, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
605 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
606 patientMerge13 = patientMerge13[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
607 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
608 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
609
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
610 patientMerge23 = merge(patient2, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
611 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
612 patientMerge23 = patientMerge23[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
613 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
614 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
615
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
616 merge.list = list()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
617 merge.list[["second"]] = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
618
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
619 #print(paste(nrow(patient1), nrow(patient2), nrow(patient3), label1, label2, label3))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
620
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
621 start.time = proc.time()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
622 if(paste(label1, "123") %in% names(patient.merge.list)){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
623 patientMerge = patient.merge.list[[paste(label1, "123")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
624 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
625 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
626 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
627
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
628 #merge.list[["second"]] = patient.merge.list.second[[label1]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
629
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
630 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
631 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
632 while(nrow(patient.fuzzy) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
633 first.merge = patient.fuzzy[1,"merge"]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
634 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
635 first.sample = paste(patient.fuzzy[1,"Sample"], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
636
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
637 merge.filter = first.merge == patient.fuzzy$merge
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
638
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
639 second.sample = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
640 third.sample = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
641
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
642 sample.filter.1 = first.sample == patient.fuzzy$Sample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
643 sample.filter.2 = second.sample == patient.fuzzy$Sample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
644 sample.filter.3 = third.sample == patient.fuzzy$Sample
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
645
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
646 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
647
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
648 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
649 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
650 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
651
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
652 matches.in.1 = any(match.filter.1)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
653 matches.in.2 = any(match.filter.2)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
654 matches.in.3 = any(match.filter.3)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
655
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
656 rows.1 = patient.fuzzy[match.filter.1,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
657
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
658 sum.1 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
659 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
660 Receptor = rows.1[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
661 Sample = rows.1[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
662 Cell_Count = rows.1[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
663 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
664 Log10_Frequency = log10(sum(rows.1$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
665 Total_Read_Count = sum(rows.1$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
666 dsPerM = sum(rows.1$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
667 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
668 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
669 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
670 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
671 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
672 Frequency = sum(rows.1$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
673 locus_V = rows.1[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
674 locus_J = rows.1[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
675 uniqueID = rows.1[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
676 normalized_read_count = sum(rows.1$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
677 sum.2 = sum.1[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
678 rows.2 = patient.fuzzy[match.filter.2,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
679 if(matches.in.2){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
680 sum.2 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
681 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
682 Receptor = rows.2[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
683 Sample = rows.2[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
684 Cell_Count = rows.2[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
685 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
686 Log10_Frequency = log10(sum(rows.2$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
687 Total_Read_Count = sum(rows.2$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
688 dsPerM = sum(rows.2$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
689 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
690 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
691 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
692 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
693 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
694 Frequency = sum(rows.2$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
695 locus_V = rows.2[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
696 locus_J = rows.2[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
697 uniqueID = rows.2[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
698 normalized_read_count = sum(rows.2$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
699 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
700
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
701 sum.3 = sum.1[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
702 rows.3 = patient.fuzzy[match.filter.3,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
703 if(matches.in.3){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
704 sum.3 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
705 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
706 Receptor = rows.3[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
707 Sample = rows.3[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
708 Cell_Count = rows.3[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
709 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
710 Log10_Frequency = log10(sum(rows.3$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
711 Total_Read_Count = sum(rows.3$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
712 dsPerM = sum(rows.3$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
713 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
714 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
715 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
716 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
717 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
718 Frequency = sum(rows.3$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
719 locus_V = rows.3[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
720 locus_J = rows.3[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
721 uniqueID = rows.3[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
722 normalized_read_count = sum(rows.3$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
723 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
724
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
725 if(matches.in.2 & matches.in.3){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
726 merge.123 = merge(sum.1, sum.2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
727 merge.123 = merge(merge.123, sum.3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
728 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
729 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
730
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
731 patientMerge = rbind(patientMerge, merge.123)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
732 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
733
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
734 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
735 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
736
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
737 } else if (matches.in.2) {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
738 #other.sample1 = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
739 #other.sample2 = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
740
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
741 second.sample = sum.2[,"Sample"]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
742
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
743 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
744
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
745 merge.12 = merge(sum.1, sum.2, by="merge")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
746
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
747 current.merge.list = rbind(current.merge.list, merge.12)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
748 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
749
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
750 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
751
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
752 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
753 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
754
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
755 } else if (matches.in.3) {
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
756
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
757 #other.sample1 = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
758 #other.sample2 = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
759
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
760 second.sample = sum.3[,"Sample"]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
761
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
762 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
763
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
764 merge.13 = merge(sum.1, sum.3, by="merge")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
765
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
766 current.merge.list = rbind(current.merge.list, merge.13)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
767 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
768
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
769 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
770
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
771 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
772 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
773
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
774 } else if(nrow(rows.1) > 1){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
775 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
776 print(names(patient1)[names(patient1) %in% sum.1])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
777 print(names(patient1)[!(names(patient1) %in% sum.1)])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
778 print(names(patient1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
779 print(names(sum.1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
780 print(summary(sum.1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
781 print(summary(patient1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
782 print(dim(sum.1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
783 print(dim(patient1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
784 print(head(sum.1[,names(patient1)]))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
785 patient1 = rbind(patient1, sum.1[,names(patient1)])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
786 patient.fuzzy = patient.fuzzy[-match.filter.1,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
787 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
788 patient.fuzzy = patient.fuzzy[-1,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
789 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
790
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
791 tmp.rows = rbind(rows.1, rows.2, rows.3)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
792 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
793
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
794 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
795 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
796 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
797 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
798
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
799 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
800 patient.merge.list[[paste(label1, "123")]] = patientMerge
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
801
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
802 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
803 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
804 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
805
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
806 patient.merge.list[[paste(label1, "12")]] = patientMerge12
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
807 patient.merge.list[[paste(label1, "13")]] = patientMerge13
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
808 patient.merge.list[[paste(label1, "23")]] = patientMerge23
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
809
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
810 #patient.merge.list.second[[label1]] = merge.list[["second"]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
811 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
812 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
813 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
814 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
815 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
816 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
817
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
818 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
819 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
820 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
821 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
822
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
823 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
824 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
825 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
826
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
827 if(F){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
828 patientMerge = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
829 patientMerge = merge(patientMerge, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
830 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
831 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
832 patientMerge12 = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
833 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
834 patientMerge13 = merge(patient1, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
835 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
836 patientMerge23 = merge(patient2, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
837 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
838 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
839
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
840 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
841 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
842 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
843
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
844 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
845
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
846 res1 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
847 res2 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
848 res3 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
849 res12 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
850 res13 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
851 res23 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
852 resAll = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
853 read1Count = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
854 read2Count = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
855 read3Count = vector()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
856
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
857 if(appendTriplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
858 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
859 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
860 for(iter in 1:length(product[,1])){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
861 threshhold = product[iter,threshholdIndex]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
862 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
863 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
864 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
865 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
866
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
867 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
868 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
869 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
870
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
871 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
872 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
873 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
874
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
875 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
876 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
877 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
878 res1 = append(res1, sum(one))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
879 res2 = append(res2, sum(two))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
880 res3 = append(res3, sum(three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
881 resAll = append(resAll, sum(all))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
882 res12 = append(res12, sum(one_two))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
883 res13 = append(res13, sum(one_three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
884 res23 = append(res23, sum(two_three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
885 #threshhold = 0
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
886 if(threshhold != 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
887 if(sum(one) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
888 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
889 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
890 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
891 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
892 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
893 if(sum(two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
894 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
895 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
896 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
897 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
898 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
899 if(sum(three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
900 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
901 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
902 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
903 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
904 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
905 if(sum(one_two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
906 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
907 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
908 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
909 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
910 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
911 if(sum(one_three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
912 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
913 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
914 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
915 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
916 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
917 if(sum(two_three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
918 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
919 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
920 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
921 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
922 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
923 } else { #scatterplot data
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
924 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
925 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
926 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
927 if(sum(in_two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
928 scatterplot_locus_data[in_two,]$type = "In two"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
929 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
930 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
931 if(sum(in_three)> 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
932 scatterplot_locus_data[in_three,]$type = "In three"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
933 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
934 not_in_one = scatterplot_locus_data$type != "In one"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
935 if(sum(not_in_one) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
936 #scatterplot_locus_data[not_in_one,]$type = "In multiple"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
937 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
938 p = NULL
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
939 if(nrow(scatterplot_locus_data) != 0){
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
940 filename.scatter = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_scatter_", threshhold, sep="")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
941 write.table(scatterplot_locus_data, file=paste(filename.scatter, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
942 if(on == "normalized_read_count"){
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
943 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
944 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
945 } else {
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
946 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
947 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
948 }
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
949 p = p + geom_point(aes(colour=type), position="jitter")
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
950 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
951 } else {
2
7ffd0fba8cf4 Uploaded
davidvanzessen
parents: 1
diff changeset
952 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
953 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
954 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
955 print(p)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
956 dev.off()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
957 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
958 if(sum(all) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
959 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
960 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
961 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
962 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
963 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
964 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
965 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
966 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
967 colnames(patientResult)[6] = oneSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
968 colnames(patientResult)[7] = twoSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
969 colnames(patientResult)[8] = threeSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
970 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
971 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
972 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
973
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
974 colnamesBak = colnames(patientResult)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
975 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
976 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
977 colnames(patientResult) = colnamesBak
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
978
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
979 patientResult$Locus = factor(patientResult$Locus, Titles)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
980 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
981
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
982 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
983 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
984 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
985 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
986 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
987 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
988 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
989 print(plt)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
990 dev.off()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
991
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
992 fontSize = 4
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
993
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
994 bak = patientResult
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
995 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
996 patientResult$relativeValue = patientResult$value * 10
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
997 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
998 plt = ggplot(patientResult)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
999 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1000 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1001 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1002 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1003 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1004 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1005 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1006 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1007 print(plt)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1008 dev.off()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1009 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1010
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1011 if(nrow(triplets) != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1012
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1013 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1014
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1015 triplets$uniqueID = paste(triplets$Patient, triplets$Sample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1016
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1017 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1018
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1019 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1020 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1021 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1022
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1023 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1024 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1025
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1026 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1027
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1028 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1029
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1030 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1031
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1032 triplets = triplets[triplets$normalized_read_count >= min_cells,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1033
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1034 column_drops = c("min_cell_count", "min_cell_paste")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1035
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1036 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1037
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1038 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1039
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1040 interval = intervalReads
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1041 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1042 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1043
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1044 triplets = split(triplets, triplets$Patient, drop=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1045 print(nrow(triplets))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1046 for(triplet in triplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1047 samples = unique(triplet$Sample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1048 one = triplet[triplet$Sample == samples[1],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1049 two = triplet[triplet$Sample == samples[2],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1050 three = triplet[triplet$Sample == samples[3],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1051
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1052 print(paste(nrow(triplet), nrow(one), nrow(two), nrow(three)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1053 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="normalized_read_count", T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1054 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1055
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1056 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1057
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1058 interval = intervalFreq
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1059 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1060 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1061
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1062 for(triplet in triplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1063 samples = unique(triplet$Sample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1064 one = triplet[triplet$Sample == samples[1],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1065 two = triplet[triplet$Sample == samples[2],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1066 three = triplet[triplet$Sample == samples[3],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1067 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="Frequency", F)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1068 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1069 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1070 cat("", file="triplets.txt")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1071 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1072 cat("</table></html>", file=logfile, append=T)