annotate RScript.r @ 1:75853bceec00 draft

Uploaded
author davidvanzessen
date Tue, 17 Jan 2017 07:24:44 -0500
parents ed6885c85660
children 7ffd0fba8cf4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
2 options(scipen=999)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
3
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
4 inFile = args[1]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
5 outDir = args[2]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
6 logfile = args[3]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
7 min_freq = as.numeric(args[4])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
8 min_cells = as.numeric(args[5])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
9 mergeOn = args[6]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
11 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
12
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
13 library(ggplot2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
14 library(reshape2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
15 library(data.table)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
16 library(grid)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
17 library(parallel)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
18 #require(xtable)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
21 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
22 dat$dsPerM = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
23 dat = dat[!is.na(dat$Patient),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
24 dat$Related_to_leukemia_clone = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
25
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
26 setwd(outDir)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
27 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
28 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
29 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
30
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
31 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
32
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
33 dat$Frequency = ((10^dat$Log10_Frequency)*100)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
34
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
35 dat = dat[dat$Frequency >= min_freq,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
36
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
37 patient.sample.counts = data.frame(data.table(dat)[, list(count=.N), by=c("Patient", "Sample")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
38 patient.sample.counts = data.frame(data.table(patient.sample.counts)[, list(count=.N), by=c("Patient")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
39
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
40 print("Found the following patients with number of samples:")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
41 print(patient.sample.counts)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
42
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
43 patient.sample.counts.pairs = patient.sample.counts[patient.sample.counts$count %in% 1:2,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
44 patient.sample.counts.triplets = patient.sample.counts[patient.sample.counts$count == 3,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
45
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
46
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
47
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
48 triplets = dat[dat$Patient %in% patient.sample.counts.triplets$Patient,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
49 dat = dat[dat$Patient %in% patient.sample.counts.pairs$Patient,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
50
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
51 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
52
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
53 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
54 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
55 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
56
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
57 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
58 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
59
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
60 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
61 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
62 dat = merge(dat, min_cell_count, by="min_cell_paste")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
63 print(paste("rows:", nrow(dat)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
64 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
65
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
66 dat = dat[dat$normalized_read_count >= min_cells,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
67
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
68 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
69
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
70 patients = split(dat, dat$Patient, drop=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
71 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
72 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
73 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
74 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
75 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
76 Titles = factor(Titles, levels=Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
77 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
78
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
79 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
80
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
81 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
82 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
83 scatter_locus_data_list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
84 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
85 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
86 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
87 if (!is.data.frame(x) & is.list(x)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
88 x = x[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
89 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
90 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
91 x = data.frame(x,stringsAsFactors=F)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
92 onShort = "reads"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
93 if(on == "Frequency"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
94 onShort = "freq"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
95 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
96 onx = paste(on, ".x", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
97 ony = paste(on, ".y", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
98 splt = split(x, x$Sample, drop=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
99 type="pair"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
100 if(length(splt) == 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
101 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
102 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
103 type="single"
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
104 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
105 patient1 = splt[[1]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
106 patient2 = splt[[2]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
107
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
108 threshholdIndex = which(colnames(product) == "interval")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
109 V_SegmentIndex = which(colnames(product) == "V_Segments")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
110 J_SegmentIndex = which(colnames(product) == "J_Segments")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
111 titleIndex = which(colnames(product) == "Titles")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
112 sampleIndex = which(colnames(x) == "Sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
113 patientIndex = which(colnames(x) == "Patient")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
114 oneSample = paste(patient1[1,sampleIndex], sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
115 twoSample = paste(patient2[1,sampleIndex], sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
116 patient = paste(x[1,patientIndex])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
117
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
118 switched = F
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
119 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
120 tmp = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
121 twoSample = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
122 oneSample = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
123 tmp = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
124 patient1 = patient2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
125 patient2 = tmp
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
126 switched = T
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
127 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
128 if(appendtxt){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
129 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
130 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
131 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
132
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
133 if(mergeOn == "Clone_Sequence"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
134 patient1$merge = paste(patient1$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
135 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
136 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
137 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
138 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
139 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
140
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
141 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
142 #scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
143 scatterplot_data = patient1[NULL,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
144 #scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
145 #scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
146 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
147 #scatterplot_data$type = factor(x=NULL, levels=scatterplot.data.type.factor)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
148 scatterplot_data$type = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
149 scatterplot_data$link = numeric(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
150 scatterplot_data$on = character(0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
151
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
152 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
153 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
154
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
155 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
156
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
157 start.time = proc.time()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
158 merge.list = c()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
159
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
160 if(patient %in% names(patient.merge.list)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
161 patientMerge = patient.merge.list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
162 merge.list[["second"]] = patient.merge.list.second[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
163 scatterplot_data = scatter_locus_data_list[[patient]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
164 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
165
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
166 print(names(patient.merge.list))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
167 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
168 #fuzzy matching here...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
169 #merge.list = patientMerge$merge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
170
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
171 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
172 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
173
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
174 patient1.fuzzy = patient1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
175 patient2.fuzzy = patient2
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
176
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
177 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
178 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
179
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
180 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
181 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
182
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
183 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
184 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
185
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
186 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
187 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
188
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
189 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
190 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
191
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
192 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
193 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
194
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
195 merge.list = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
196
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
197 merge.list[["second"]] = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
198
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
199 link.count = 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
200
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
201 while(nrow(patient.fuzzy) > 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
202 first.merge = patient.fuzzy[1,"merge"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
203 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
204 first.sample = patient.fuzzy[1,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
205 merge.filter = first.merge == patient.fuzzy$merge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
206
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
207 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
208
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
209 first.sample.filter = first.sample == patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
210 second.sample.filter = first.sample != patient.fuzzy$Sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
211
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
212 #first match same sample, sum to a single row, same for other sample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
213 #then merge rows like 'normal'
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
214
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
215 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
216
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
217
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
218
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
219 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
220 first.match.filter = merge.filter & sequence.filter & first.sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
221 second.match.filter = merge.filter & sequence.filter & second.sample.filter
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
222
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
223 first.rows = patient.fuzzy[first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
224 second.rows = patient.fuzzy[second.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
225
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
226 first.rows.v = table(first.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
227 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
228 first.rows.j = table(first.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
229 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
230
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
231 first.sum = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
232 Patient = patient,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
233 Receptor = first.rows[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
234 Sample = first.rows[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
235 Cell_Count = first.rows[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
236 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
237 Log10_Frequency = log10(sum(first.rows$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
238 Total_Read_Count = sum(first.rows$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
239 dsPerM = sum(first.rows$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
240 J_Segment_Major_Gene = first.rows.j,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
241 V_Segment_Major_Gene = first.rows.v,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
242 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
243 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
244 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
245 Frequency = sum(first.rows$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
246 locus_V = first.rows[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
247 locus_J = first.rows[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
248 min_cell_count = first.rows[1,"min_cell_count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
249 normalized_read_count = sum(first.rows$normalized_read_count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
250 paste = first.rows[1,"paste"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
251 min_cell_paste = first.rows[1,"min_cell_paste"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
252
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
253 if(nrow(second.rows) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
254 second.rows.v = table(second.rows$V_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
255 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
256 second.rows.j = table(second.rows$J_Segment_Major_Gene)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
257 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
258
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
259 second.sum = data.frame(merge = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
260 Patient = patient,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
261 Receptor = second.rows[1,"Receptor"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
262 Sample = second.rows[1,"Sample"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
263 Cell_Count = second.rows[1,"Cell_Count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
264 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
265 Log10_Frequency = log10(sum(second.rows$Frequency)),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
266 Total_Read_Count = sum(second.rows$Total_Read_Count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
267 dsPerM = sum(second.rows$dsPerM),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
268 J_Segment_Major_Gene = second.rows.j,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
269 V_Segment_Major_Gene = second.rows.v,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
270 Clone_Sequence = first.clone.sequence,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
271 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
272 Related_to_leukemia_clone = F,
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
273 Frequency = sum(second.rows$Frequency),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
274 locus_V = second.rows[1,"locus_V"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
275 locus_J = second.rows[1,"locus_J"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
276 min_cell_count = second.rows[1,"min_cell_count"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
277 normalized_read_count = sum(second.rows$normalized_read_count),
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
278 paste = second.rows[1,"paste"],
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
279 min_cell_paste = second.rows[1,"min_cell_paste"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
280
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
281 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
282 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
283
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
284 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
285 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
286
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
287 tmp.rows = rbind(first.rows, second.rows)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
288 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
289
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
290
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
291 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
292 scatterplot.row = first.sum[,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
293 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
294 scatterplot.row$link = link.count
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
295 scatterplot.row$on = onShort
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
296
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
297 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
298
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
299 scatterplot.row = second.sum[,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
300 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
301 scatterplot.row$link = link.count
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
302 scatterplot.row$on = onShort
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
303
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
304 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
305
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
306 #write some information about the match to a log file
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
307 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
308 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
309 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
310 second.clone.sequence = second.rows[1,"Clone_Sequence"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
311 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
312 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
313 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
314 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
315 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
316 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
317
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
318 } else if(nrow(first.rows) > 1) {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
319 if(patient1[1,"Sample"] == first.sample){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
320 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
321 patient1 = rbind(patient1, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
322 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
323 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
324 patient2 = rbind(patient2, first.sum)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
325 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
326
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
327 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
328 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
329
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
330 patient.fuzzy = patient.fuzzy[-first.match.filter,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
331
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
332 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
333 scatterplot.row = first.sum[,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
334 scatterplot.row$type = first.sum[,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
335 scatterplot.row$link = link.count
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
336 scatterplot.row$on = onShort
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
337
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
338 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
339
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
340 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
341 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
342 patient.fuzzy = patient.fuzzy[-1,]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
343
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
344 #add to the scatterplot data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
345 scatterplot.row = first.sum[,scatterplot_data_columns]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
346 scatterplot.row$type = first.sum[,"Sample"]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
347 scatterplot.row$link = link.count
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
348 scatterplot.row$on = onShort
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
349
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
350 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
351 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
352 link.count = link.count + 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
353 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
354 patient.merge.list[[patient]] <<- patientMerge
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
355 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
356
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
357 sample.order = data.frame(type = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")),type.order = 1:4)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
358 scatterplot_data = merge(scatterplot_data, sample.order, by="type")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
359
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
360 scatter_locus_data_list[[patient]] <<- scatterplot_data
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
361 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
362 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
363
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
364 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
365 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
366
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
367
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
368 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
369 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
370 res1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
371 res2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
372 resBoth = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
373 read1Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
374 read2Count = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
375 locussum1 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
376 locussum2 = vector()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
377
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
378 #for(iter in 1){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
379 for(iter in 1:length(product[,1])){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
380 threshhold = product[iter,threshholdIndex]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
381 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
382 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
383 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
384 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
385 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
386 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
387 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
388 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
389 res1 = append(res1, sum(one))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
390 res2 = append(res2, sum(two))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
391 resBoth = append(resBoth, sum(both))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
392 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
393 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
394 #threshhold = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
395 if(threshhold != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
396 if(sum(one) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
397 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
398 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
399 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
400 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
401 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
402 if(sum(two) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
403 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
404 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
405 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
406 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
407 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
408 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
409 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
410 if(nrow(scatterplot_locus_data) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
411 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
412 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
413
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
414
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
415
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
416 p = NULL
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
417 print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
418 if(nrow(scatterplot_locus_data) != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
419 if(on == "normalized_read_count"){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
420 write.table(scatterplot_locus_data, file=paste(oneSample, twoSample, product[iter, titleIndex], "scatterplot_locus_data.txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
421 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
422 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales, limits=c(1,1e6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
423 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
424 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
425 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
426 p = p + geom_point(aes(colour=type), position="dodge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
427 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
428 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
429 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
430 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
431 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
432 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
433 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
434 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
435 if(sum(both) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
436 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
437 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
438 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
439 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
440 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
441 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
442 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
443 if(sum(is.na(patientResult$percentage)) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
444 patientResult[is.na(patientResult$percentage),]$percentage = 0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
445 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
446 colnames(patientResult)[6] = oneSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
447 colnames(patientResult)[8] = twoSample
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
448 colnamesBak = colnames(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
449 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
450 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
451 colnames(patientResult) = colnamesBak
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
452
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
453 patientResult$Locus = factor(patientResult$Locus, Titles)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
454 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
455
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
456 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
457 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
458 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
459 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
460 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
461 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
462 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
463 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
464 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
465 #(t,r,b,l)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
466 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
467 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
468 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
469 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
470 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
471 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
472 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
473 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
474 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
475
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
476 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
477 patientResult$relativeValue = patientResult$value * 10
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
478 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
479 plt = ggplot(patientResult)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
480 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
481 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
482 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
483 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
484 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
485 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
486 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
487 print(plt)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
488 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
489 }
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
490 if(length(patients) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
491 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
492
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
493 interval = intervalFreq
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
494 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
495 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
496 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
497
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
498 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
499
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
500 interval = intervalReads
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
501 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
502 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
503 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
504 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
505 if(nrow(single_patients) > 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
506 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
507 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
508 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
509 p = p + xlab("In one or both samples") + ylab("Reads")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
510 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
511 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
512 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
513 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
514
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
515 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
516 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
517 p = p + geom_point(aes(colour=type), position="jitter")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
518 p = p + xlab("In one or both samples") + ylab("Frequency")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
519 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
520 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
521 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
522 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
523 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
524 empty <- data.frame()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
525 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
526
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
527 png("singles_reads_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
528 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
529 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
530
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
531 png("singles_freq_scatterplot.png", width=400, height=300)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
532 print(p)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
533 dev.off()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
534 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
535
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
536 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
537 patient.merge.list.second = list()
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
538
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
539 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
540 onShort = "reads"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
541 if(on == "Frequency"){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
542 onShort = "freq"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
543 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
544 onx = paste(on, ".x", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
545 ony = paste(on, ".y", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
546 onz = paste(on, ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
547 type="triplet"
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
548
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
549 threshholdIndex = which(colnames(product) == "interval")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
550 V_SegmentIndex = which(colnames(product) == "V_Segments")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
551 J_SegmentIndex = which(colnames(product) == "J_Segments")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
552 titleIndex = which(colnames(product) == "Titles")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
553 sampleIndex = which(colnames(patient1) == "Sample")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
554 patientIndex = which(colnames(patient1) == "Patient")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
555 oneSample = paste(patient1[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
556 twoSample = paste(patient2[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
557 threeSample = paste(patient3[1,sampleIndex], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
558
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
559 if(mergeOn == "Clone_Sequence"){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
560 patient1$merge = paste(patient1$Clone_Sequence)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
561 patient2$merge = paste(patient2$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
562 patient3$merge = paste(patient3$Clone_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
563
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
564 } else {
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
565 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
566 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
567 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
568 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
569
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
570 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
571 patient1.fuzzy = patient1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
572 patient2.fuzzy = patient2
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
573 patient3.fuzzy = patient3
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
574
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
575 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
576
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
577 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
578 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
579 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
580
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
581 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
582 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
583
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
584 other.sample.list = list()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
585 other.sample.list[[oneSample]] = c(twoSample, threeSample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
586 other.sample.list[[twoSample]] = c(oneSample, threeSample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
587 other.sample.list[[threeSample]] = c(oneSample, twoSample)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
588
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
589 patientMerge = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
590 patientMerge = merge(patientMerge, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
591 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
592 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
593 patientMerge = patientMerge[NULL,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
594
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
595 duo.merge.list = list()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
596
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
597 patientMerge12 = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
598 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
599 patientMerge12 = patientMerge12[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
600 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
601 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
602
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
603 patientMerge13 = merge(patient1, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
604 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
605 patientMerge13 = patientMerge13[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
606 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
607 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
608
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
609 patientMerge23 = merge(patient2, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
610 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
611 patientMerge23 = patientMerge23[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
612 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
613 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
614
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
615 merge.list = list()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
616 merge.list[["second"]] = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
617
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
618 #print(paste(nrow(patient1), nrow(patient2), nrow(patient3), label1, label2, label3))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
619
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
620 start.time = proc.time()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
621 if(paste(label1, "123") %in% names(patient.merge.list)){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
622 patientMerge = patient.merge.list[[paste(label1, "123")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
623 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
624 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
625 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
626
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
627 #merge.list[["second"]] = patient.merge.list.second[[label1]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
628
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
629 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
630 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
631 while(nrow(patient.fuzzy) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
632 first.merge = patient.fuzzy[1,"merge"]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
633 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
634 first.sample = paste(patient.fuzzy[1,"Sample"], sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
635
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
636 merge.filter = first.merge == patient.fuzzy$merge
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
637
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
638 second.sample = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
639 third.sample = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
640
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
641 sample.filter.1 = first.sample == patient.fuzzy$Sample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
642 sample.filter.2 = second.sample == patient.fuzzy$Sample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
643 sample.filter.3 = third.sample == patient.fuzzy$Sample
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
644
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
645 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
646
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
647 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
648 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
649 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
650
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
651 matches.in.1 = any(match.filter.1)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
652 matches.in.2 = any(match.filter.2)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
653 matches.in.3 = any(match.filter.3)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
654
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
655 rows.1 = patient.fuzzy[match.filter.1,]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
656
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
657 sum.1 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
658 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
659 Receptor = rows.1[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
660 Sample = rows.1[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
661 Cell_Count = rows.1[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
662 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
663 Log10_Frequency = log10(sum(rows.1$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
664 Total_Read_Count = sum(rows.1$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
665 dsPerM = sum(rows.1$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
666 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
667 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
668 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
669 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
670 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
671 Frequency = sum(rows.1$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
672 locus_V = rows.1[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
673 locus_J = rows.1[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
674 uniqueID = rows.1[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
675 normalized_read_count = sum(rows.1$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
676 sum.2 = sum.1[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
677 rows.2 = patient.fuzzy[match.filter.2,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
678 if(matches.in.2){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
679 sum.2 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
680 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
681 Receptor = rows.2[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
682 Sample = rows.2[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
683 Cell_Count = rows.2[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
684 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
685 Log10_Frequency = log10(sum(rows.2$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
686 Total_Read_Count = sum(rows.2$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
687 dsPerM = sum(rows.2$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
688 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
689 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
690 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
691 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
692 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
693 Frequency = sum(rows.2$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
694 locus_V = rows.2[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
695 locus_J = rows.2[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
696 uniqueID = rows.2[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
697 normalized_read_count = sum(rows.2$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
698 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
699
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
700 sum.3 = sum.1[NULL,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
701 rows.3 = patient.fuzzy[match.filter.3,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
702 if(matches.in.3){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
703 sum.3 = data.frame(merge = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
704 Patient = label1,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
705 Receptor = rows.3[1,"Receptor"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
706 Sample = rows.3[1,"Sample"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
707 Cell_Count = rows.3[1,"Cell_Count"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
708 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
709 Log10_Frequency = log10(sum(rows.3$Frequency)),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
710 Total_Read_Count = sum(rows.3$Total_Read_Count),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
711 dsPerM = sum(rows.3$dsPerM),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
712 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
713 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
714 Clone_Sequence = first.clone.sequence,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
715 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
716 Related_to_leukemia_clone = F,
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
717 Frequency = sum(rows.3$Frequency),
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
718 locus_V = rows.3[1,"locus_V"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
719 locus_J = rows.3[1,"locus_J"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
720 uniqueID = rows.3[1,"uniqueID"],
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
721 normalized_read_count = sum(rows.3$normalized_read_count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
722 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
723
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
724 if(matches.in.2 & matches.in.3){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
725 merge.123 = merge(sum.1, sum.2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
726 merge.123 = merge(merge.123, sum.3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
727 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
728 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
729
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
730 patientMerge = rbind(patientMerge, merge.123)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
731 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
732
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
733 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
734 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
735
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
736 } else if (matches.in.2) {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
737 #other.sample1 = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
738 #other.sample2 = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
739
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
740 second.sample = sum.2[,"Sample"]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
741
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
742 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
743
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
744 merge.12 = merge(sum.1, sum.2, by="merge")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
745
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
746 current.merge.list = rbind(current.merge.list, merge.12)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
747 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
748
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
749 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
750
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
751 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
752 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
753
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
754 } else if (matches.in.3) {
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
755
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
756 #other.sample1 = other.sample.list[[first.sample]][1]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
757 #other.sample2 = other.sample.list[[first.sample]][2]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
758
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
759 second.sample = sum.3[,"Sample"]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
760
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
761 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
762
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
763 merge.13 = merge(sum.1, sum.3, by="merge")
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
764
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
765 current.merge.list = rbind(current.merge.list, merge.13)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
766 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
767
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
768 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
769
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
770 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
771 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
772
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
773 } else if(nrow(rows.1) > 1){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
774 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
775 print(names(patient1)[names(patient1) %in% sum.1])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
776 print(names(patient1)[!(names(patient1) %in% sum.1)])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
777 print(names(patient1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
778 print(names(sum.1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
779 print(summary(sum.1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
780 print(summary(patient1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
781 print(dim(sum.1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
782 print(dim(patient1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
783 print(head(sum.1[,names(patient1)]))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
784 patient1 = rbind(patient1, sum.1[,names(patient1)])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
785 patient.fuzzy = patient.fuzzy[-match.filter.1,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
786 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
787 patient.fuzzy = patient.fuzzy[-1,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
788 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
789
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
790 tmp.rows = rbind(rows.1, rows.2, rows.3)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
791 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
792
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
793 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
794 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
795 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
796 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
797
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
798 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
799 patient.merge.list[[paste(label1, "123")]] = patientMerge
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
800
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
801 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
802 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
803 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
804
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
805 patient.merge.list[[paste(label1, "12")]] = patientMerge12
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
806 patient.merge.list[[paste(label1, "13")]] = patientMerge13
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
807 patient.merge.list[[paste(label1, "23")]] = patientMerge23
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
808
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
809 #patient.merge.list.second[[label1]] = merge.list[["second"]]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
810 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
811 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
812 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
813 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
814 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
815 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
816
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
817 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
818 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
819 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
820 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
821
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
822 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
823 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
824 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
825
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
826 if(F){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
827 patientMerge = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
828 patientMerge = merge(patientMerge, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
829 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
830 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
831 patientMerge12 = merge(patient1, patient2, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
832 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
833 patientMerge13 = merge(patient1, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
834 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
835 patientMerge23 = merge(patient2, patient3, by="merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
836 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
837 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
838
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
839 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
840 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
841 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
842
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
843 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
844
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
845 res1 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
846 res2 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
847 res3 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
848 res12 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
849 res13 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
850 res23 = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
851 resAll = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
852 read1Count = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
853 read2Count = vector()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
854 read3Count = vector()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
855
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
856 if(appendTriplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
857 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
858 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
859 for(iter in 1:length(product[,1])){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
860 threshhold = product[iter,threshholdIndex]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
861 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
862 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
863 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
864 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
865
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
866 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
867 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
868 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
869
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
870 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
871 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
872 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
873
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
874 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
875 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
876 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
877 res1 = append(res1, sum(one))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
878 res2 = append(res2, sum(two))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
879 res3 = append(res3, sum(three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
880 resAll = append(resAll, sum(all))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
881 res12 = append(res12, sum(one_two))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
882 res13 = append(res13, sum(one_three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
883 res23 = append(res23, sum(two_three))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
884 #threshhold = 0
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
885 if(threshhold != 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
886 if(sum(one) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
887 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
888 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
889 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
890 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
891 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
892 if(sum(two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
893 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
894 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
895 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
896 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
897 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
898 if(sum(three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
899 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
900 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
901 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
902 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
903 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
904 if(sum(one_two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
905 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
906 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
907 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
908 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
909 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
910 if(sum(one_three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
911 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
912 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
913 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
914 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
915 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
916 if(sum(two_three) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
917 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
918 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
919 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
920 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
921 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
922 } else { #scatterplot data
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
923 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
924 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
925 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
926 if(sum(in_two) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
927 scatterplot_locus_data[in_two,]$type = "In two"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
928 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
929 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
930 if(sum(in_three)> 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
931 scatterplot_locus_data[in_three,]$type = "In three"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
932 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
933 not_in_one = scatterplot_locus_data$type != "In one"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
934 if(sum(not_in_one) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
935 #scatterplot_locus_data[not_in_one,]$type = "In multiple"
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
936 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
937 p = NULL
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
938 if(nrow(scatterplot_locus_data) != 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
939 if(on == "normalized_read_count"){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
940 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
941 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
942 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
943 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
944 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
945 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
946 p = p + geom_point(aes(colour=type), position="jitter")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
947 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
948 } else {
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
949 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
950 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
951 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
952 print(p)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
953 dev.off()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
954 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
955 if(sum(all) > 0){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
956 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
957 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
958 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
959 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
960 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
961 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
962 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
963 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
964 colnames(patientResult)[6] = oneSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
965 colnames(patientResult)[7] = twoSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
966 colnames(patientResult)[8] = threeSample
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
967 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
968 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
969 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
970
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
971 colnamesBak = colnames(patientResult)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
972 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
973 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
974 colnames(patientResult) = colnamesBak
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
975
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
976 patientResult$Locus = factor(patientResult$Locus, Titles)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
977 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
978
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
979 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
980 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
981 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
982 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
983 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
984 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
985 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
986 print(plt)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
987 dev.off()
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
988
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
989 fontSize = 4
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
990
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
991 bak = patientResult
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
992 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
993 patientResult$relativeValue = patientResult$value * 10
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
994 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
995 plt = ggplot(patientResult)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
996 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
997 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
998 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
999 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1000 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1001 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1002 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1003 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1004 print(plt)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1005 dev.off()
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1006 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1007
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1008 if(nrow(triplets) != 0){
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1009
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1010 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1011
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1012 triplets$uniqueID = paste(triplets$Patient, triplets$Sample, sep="_")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1013
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1014 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1015
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1016 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1017 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1018 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1019
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1020 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1021 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1022
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1023 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1024
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1025 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1026
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1027 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1028
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1029 triplets = triplets[triplets$normalized_read_count >= min_cells,]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1030
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1031 column_drops = c("min_cell_count", "min_cell_paste")
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1032
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1033 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1034
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1035 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1036
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1037 interval = intervalReads
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1038 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1039 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1040
1
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1041 triplets = split(triplets, triplets$Patient, drop=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1042 print(nrow(triplets))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1043 for(triplet in triplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1044 samples = unique(triplet$Sample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1045 one = triplet[triplet$Sample == samples[1],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1046 two = triplet[triplet$Sample == samples[2],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1047 three = triplet[triplet$Sample == samples[3],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1048
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1049 print(paste(nrow(triplet), nrow(one), nrow(two), nrow(three)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1050 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="normalized_read_count", T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1051 }
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1052
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1053 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1054
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1055 interval = intervalFreq
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1056 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1057 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1058
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1059 for(triplet in triplets){
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1060 samples = unique(triplet$Sample)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1061 one = triplet[triplet$Sample == samples[1],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1062 two = triplet[triplet$Sample == samples[2],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1063 three = triplet[triplet$Sample == samples[3],]
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1064 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="Frequency", F)
75853bceec00 Uploaded
davidvanzessen
parents: 0
diff changeset
1065 }
0
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1066 } else {
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1067 cat("", file="triplets.txt")
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1068 }
ed6885c85660 Uploaded
davidvanzessen
parents:
diff changeset
1069 cat("</table></html>", file=logfile, append=T)