comparison RScript.r @ 1:75853bceec00 draft

Uploaded
author davidvanzessen
date Tue, 17 Jan 2017 07:24:44 -0500
parents ed6885c85660
children 7ffd0fba8cf4
comparison
equal deleted inserted replaced
0:ed6885c85660 1:75853bceec00
32 32
33 dat$Frequency = ((10^dat$Log10_Frequency)*100) 33 dat$Frequency = ((10^dat$Log10_Frequency)*100)
34 34
35 dat = dat[dat$Frequency >= min_freq,] 35 dat = dat[dat$Frequency >= min_freq,]
36 36
37 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),] 37 patient.sample.counts = data.frame(data.table(dat)[, list(count=.N), by=c("Patient", "Sample")])
38 patient.sample.counts = data.frame(data.table(patient.sample.counts)[, list(count=.N), by=c("Patient")])
39
40 print("Found the following patients with number of samples:")
41 print(patient.sample.counts)
42
43 patient.sample.counts.pairs = patient.sample.counts[patient.sample.counts$count %in% 1:2,]
44 patient.sample.counts.triplets = patient.sample.counts[patient.sample.counts$count == 3,]
45
46
47
48 triplets = dat[dat$Patient %in% patient.sample.counts.triplets$Patient,]
49 dat = dat[dat$Patient %in% patient.sample.counts.pairs$Patient,]
38 50
39 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T) 51 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
40 52
41 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4) 53 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
42 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4) 54 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
473 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep="")) 485 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
474 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080) 486 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
475 print(plt) 487 print(plt)
476 dev.off() 488 dev.off()
477 } 489 }
478 490 if(length(patients) > 0){
479 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) 491 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
480 492
481 interval = intervalFreq 493 interval = intervalFreq
482 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 494 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
483 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 495 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
484 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) 496 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
485 497
486 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) 498 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
487 499
488 interval = intervalReads 500 interval = intervalReads
489 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 501 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
490 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 502 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
491 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") 503 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
492 504 }
493 if(nrow(single_patients) > 0){ 505 if(nrow(single_patients) > 0){
494 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) 506 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
495 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000)) 507 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
496 p = p + geom_point(aes(colour=type), position="jitter") 508 p = p + geom_point(aes(colour=type), position="jitter")
497 p = p + xlab("In one or both samples") + ylab("Reads") 509 p = p + xlab("In one or both samples") + ylab("Reads")
523 535
524 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... 536 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
525 patient.merge.list.second = list() 537 patient.merge.list.second = list()
526 538
527 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ 539 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
528 onShort = "reads" 540 onShort = "reads"
529 if(on == "Frequency"){ 541 if(on == "Frequency"){
530 onShort = "freq" 542 onShort = "freq"
531 } 543 }
532 onx = paste(on, ".x", sep="") 544 onx = paste(on, ".x", sep="")
533 ony = paste(on, ".y", sep="") 545 ony = paste(on, ".y", sep="")
534 onz = paste(on, ".z", sep="") 546 onz = paste(on, ".z", sep="")
535 type="triplet" 547 type="triplet"
536 548
537 threshholdIndex = which(colnames(product) == "interval") 549 threshholdIndex = which(colnames(product) == "interval")
538 V_SegmentIndex = which(colnames(product) == "V_Segments") 550 V_SegmentIndex = which(colnames(product) == "V_Segments")
539 J_SegmentIndex = which(colnames(product) == "J_Segments") 551 J_SegmentIndex = which(colnames(product) == "J_Segments")
540 titleIndex = which(colnames(product) == "Titles") 552 titleIndex = which(colnames(product) == "Titles")
541 sampleIndex = which(colnames(patient1) == "Sample") 553 sampleIndex = which(colnames(patient1) == "Sample")
542 patientIndex = which(colnames(patient1) == "Patient") 554 patientIndex = which(colnames(patient1) == "Patient")
543 oneSample = paste(patient1[1,sampleIndex], sep="") 555 oneSample = paste(patient1[1,sampleIndex], sep="")
544 twoSample = paste(patient2[1,sampleIndex], sep="") 556 twoSample = paste(patient2[1,sampleIndex], sep="")
545 threeSample = paste(patient3[1,sampleIndex], sep="") 557 threeSample = paste(patient3[1,sampleIndex], sep="")
546 558
547 if(mergeOn == "Clone_Sequence"){ 559 if(mergeOn == "Clone_Sequence"){
548 patient1$merge = paste(patient1$Clone_Sequence) 560 patient1$merge = paste(patient1$Clone_Sequence)
549 patient2$merge = paste(patient2$Clone_Sequence) 561 patient2$merge = paste(patient2$Clone_Sequence)
550 patient3$merge = paste(patient3$Clone_Sequence) 562 patient3$merge = paste(patient3$Clone_Sequence)
551 563
552 } else { 564 } else {
553 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) 565 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
554 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) 566 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
555 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) 567 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
556 } 568 }
557 569
558 #patientMerge = merge(patient1, patient2, by="merge")[NULL,] 570 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
559 patient1.fuzzy = patient1 571 patient1.fuzzy = patient1
560 patient2.fuzzy = patient2 572 patient2.fuzzy = patient2
561 patient3.fuzzy = patient3 573 patient3.fuzzy = patient3
562 574
563 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T) 575 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
564 576
565 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) 577 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
566 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) 578 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
567 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J) 579 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
568 580
569 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy) 581 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
570 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] 582 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
571 583
572 other.sample.list = list() 584 other.sample.list = list()
573 other.sample.list[[oneSample]] = c(twoSample, threeSample) 585 other.sample.list[[oneSample]] = c(twoSample, threeSample)
574 other.sample.list[[twoSample]] = c(oneSample, threeSample) 586 other.sample.list[[twoSample]] = c(oneSample, threeSample)
575 other.sample.list[[threeSample]] = c(oneSample, twoSample) 587 other.sample.list[[threeSample]] = c(oneSample, twoSample)
576 588
577 patientMerge = merge(patient1, patient2, by="merge") 589 patientMerge = merge(patient1, patient2, by="merge")
578 patientMerge = merge(patientMerge, patient3, by="merge") 590 patientMerge = merge(patientMerge, patient3, by="merge")
579 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") 591 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
580 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) 592 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
581 patientMerge = patientMerge[NULL,] 593 patientMerge = patientMerge[NULL,]
582 594
583 duo.merge.list = list() 595 duo.merge.list = list()
584 596
585 patientMerge12 = merge(patient1, patient2, by="merge") 597 patientMerge12 = merge(patient1, patient2, by="merge")
586 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) 598 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
587 patientMerge12 = patientMerge12[NULL,] 599 patientMerge12 = patientMerge12[NULL,]
588 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12 600 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
589 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12 601 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
590 602
591 patientMerge13 = merge(patient1, patient3, by="merge") 603 patientMerge13 = merge(patient1, patient3, by="merge")
592 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) 604 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
593 patientMerge13 = patientMerge13[NULL,] 605 patientMerge13 = patientMerge13[NULL,]
594 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13 606 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
595 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13 607 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
596 608
597 patientMerge23 = merge(patient2, patient3, by="merge") 609 patientMerge23 = merge(patient2, patient3, by="merge")
598 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) 610 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
599 patientMerge23 = patientMerge23[NULL,] 611 patientMerge23 = patientMerge23[NULL,]
600 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23 612 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
601 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23 613 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
602 614
603 merge.list = list() 615 merge.list = list()
604 merge.list[["second"]] = vector() 616 merge.list[["second"]] = vector()
605 617
606 start.time = proc.time() 618 #print(paste(nrow(patient1), nrow(patient2), nrow(patient3), label1, label2, label3))
607 if(paste(label1, "123") %in% names(patient.merge.list)){ 619
608 patientMerge = patient.merge.list[[paste(label1, "123")]] 620 start.time = proc.time()
609 patientMerge12 = patient.merge.list[[paste(label1, "12")]] 621 if(paste(label1, "123") %in% names(patient.merge.list)){
610 patientMerge13 = patient.merge.list[[paste(label1, "13")]] 622 patientMerge = patient.merge.list[[paste(label1, "123")]]
611 patientMerge23 = patient.merge.list[[paste(label1, "23")]] 623 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
612 624 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
613 merge.list[["second"]] = patient.merge.list.second[[label1]] 625 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
614 626
615 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) 627 #merge.list[["second"]] = patient.merge.list.second[[label1]]
616 } else { 628
617 while(nrow(patient.fuzzy) > 0){ 629 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
618 first.merge = patient.fuzzy[1,"merge"] 630 } else {
619 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] 631 while(nrow(patient.fuzzy) > 0){
620 first.sample = patient.fuzzy[1,"Sample"] 632 first.merge = patient.fuzzy[1,"merge"]
621 633 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
622 merge.filter = first.merge == patient.fuzzy$merge 634 first.sample = paste(patient.fuzzy[1,"Sample"], sep="")
623 635
624 second.sample = other.sample.list[[first.sample]][1] 636 merge.filter = first.merge == patient.fuzzy$merge
625 third.sample = other.sample.list[[first.sample]][2] 637
626 638 second.sample = other.sample.list[[first.sample]][1]
627 sample.filter.1 = first.sample == patient.fuzzy$Sample 639 third.sample = other.sample.list[[first.sample]][2]
628 sample.filter.2 = second.sample == patient.fuzzy$Sample 640
629 sample.filter.3 = third.sample == patient.fuzzy$Sample 641 sample.filter.1 = first.sample == patient.fuzzy$Sample
630 642 sample.filter.2 = second.sample == patient.fuzzy$Sample
631 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) 643 sample.filter.3 = third.sample == patient.fuzzy$Sample
632 644
633 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter 645 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
634 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter 646
635 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter 647 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
636 648 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
637 matches.in.1 = any(match.filter.1) 649 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
638 matches.in.2 = any(match.filter.2) 650
639 matches.in.3 = any(match.filter.3) 651 matches.in.1 = any(match.filter.1)
640 652 matches.in.2 = any(match.filter.2)
641 653 matches.in.3 = any(match.filter.3)
642 654
643 rows.1 = patient.fuzzy[match.filter.1,] 655 rows.1 = patient.fuzzy[match.filter.1,]
644 656
645 sum.1 = data.frame(merge = first.clone.sequence, 657 sum.1 = data.frame(merge = first.clone.sequence,
646 Patient = label1, 658 Patient = label1,
647 Receptor = rows.1[1,"Receptor"], 659 Receptor = rows.1[1,"Receptor"],
648 Sample = rows.1[1,"Sample"], 660 Sample = rows.1[1,"Sample"],
649 Cell_Count = rows.1[1,"Cell_Count"], 661 Cell_Count = rows.1[1,"Cell_Count"],
650 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes), 662 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
651 Log10_Frequency = log10(sum(rows.1$Frequency)), 663 Log10_Frequency = log10(sum(rows.1$Frequency)),
652 Total_Read_Count = sum(rows.1$Total_Read_Count), 664 Total_Read_Count = sum(rows.1$Total_Read_Count),
653 dsPerM = sum(rows.1$dsPerM), 665 dsPerM = sum(rows.1$dsPerM),
654 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"], 666 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
655 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"], 667 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
656 Clone_Sequence = first.clone.sequence, 668 Clone_Sequence = first.clone.sequence,
657 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"], 669 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
658 Related_to_leukemia_clone = F, 670 Related_to_leukemia_clone = F,
659 Frequency = sum(rows.1$Frequency), 671 Frequency = sum(rows.1$Frequency),
660 locus_V = rows.1[1,"locus_V"], 672 locus_V = rows.1[1,"locus_V"],
661 locus_J = rows.1[1,"locus_J"], 673 locus_J = rows.1[1,"locus_J"],
662 uniqueID = rows.1[1,"uniqueID"], 674 uniqueID = rows.1[1,"uniqueID"],
663 normalized_read_count = sum(rows.1$normalized_read_count)) 675 normalized_read_count = sum(rows.1$normalized_read_count))
664 sum.2 = sum.1[NULL,] 676 sum.2 = sum.1[NULL,]
665 rows.2 = patient.fuzzy[match.filter.2,] 677 rows.2 = patient.fuzzy[match.filter.2,]
666 if(matches.in.2){ 678 if(matches.in.2){
667 sum.2 = data.frame(merge = first.clone.sequence, 679 sum.2 = data.frame(merge = first.clone.sequence,
668 Patient = label1, 680 Patient = label1,
669 Receptor = rows.2[1,"Receptor"], 681 Receptor = rows.2[1,"Receptor"],
670 Sample = rows.2[1,"Sample"], 682 Sample = rows.2[1,"Sample"],
671 Cell_Count = rows.2[1,"Cell_Count"], 683 Cell_Count = rows.2[1,"Cell_Count"],
672 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes), 684 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
673 Log10_Frequency = log10(sum(rows.2$Frequency)), 685 Log10_Frequency = log10(sum(rows.2$Frequency)),
674 Total_Read_Count = sum(rows.2$Total_Read_Count), 686 Total_Read_Count = sum(rows.2$Total_Read_Count),
675 dsPerM = sum(rows.2$dsPerM), 687 dsPerM = sum(rows.2$dsPerM),
676 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"], 688 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
677 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"], 689 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
678 Clone_Sequence = first.clone.sequence, 690 Clone_Sequence = first.clone.sequence,
679 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"], 691 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
680 Related_to_leukemia_clone = F, 692 Related_to_leukemia_clone = F,
681 Frequency = sum(rows.2$Frequency), 693 Frequency = sum(rows.2$Frequency),
682 locus_V = rows.2[1,"locus_V"], 694 locus_V = rows.2[1,"locus_V"],
683 locus_J = rows.2[1,"locus_J"], 695 locus_J = rows.2[1,"locus_J"],
684 uniqueID = rows.2[1,"uniqueID"], 696 uniqueID = rows.2[1,"uniqueID"],
685 normalized_read_count = sum(rows.2$normalized_read_count)) 697 normalized_read_count = sum(rows.2$normalized_read_count))
686 } 698 }
687 699
688 sum.3 = sum.1[NULL,] 700 sum.3 = sum.1[NULL,]
689 rows.3 = patient.fuzzy[match.filter.3,] 701 rows.3 = patient.fuzzy[match.filter.3,]
690 if(matches.in.3){ 702 if(matches.in.3){
691 sum.3 = data.frame(merge = first.clone.sequence, 703 sum.3 = data.frame(merge = first.clone.sequence,
692 Patient = label1, 704 Patient = label1,
693 Receptor = rows.3[1,"Receptor"], 705 Receptor = rows.3[1,"Receptor"],
694 Sample = rows.3[1,"Sample"], 706 Sample = rows.3[1,"Sample"],
695 Cell_Count = rows.3[1,"Cell_Count"], 707 Cell_Count = rows.3[1,"Cell_Count"],
696 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes), 708 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
697 Log10_Frequency = log10(sum(rows.3$Frequency)), 709 Log10_Frequency = log10(sum(rows.3$Frequency)),
698 Total_Read_Count = sum(rows.3$Total_Read_Count), 710 Total_Read_Count = sum(rows.3$Total_Read_Count),
699 dsPerM = sum(rows.3$dsPerM), 711 dsPerM = sum(rows.3$dsPerM),
700 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"], 712 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
701 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"], 713 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
702 Clone_Sequence = first.clone.sequence, 714 Clone_Sequence = first.clone.sequence,
703 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"], 715 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
704 Related_to_leukemia_clone = F, 716 Related_to_leukemia_clone = F,
705 Frequency = sum(rows.3$Frequency), 717 Frequency = sum(rows.3$Frequency),
706 locus_V = rows.3[1,"locus_V"], 718 locus_V = rows.3[1,"locus_V"],
707 locus_J = rows.3[1,"locus_J"], 719 locus_J = rows.3[1,"locus_J"],
708 uniqueID = rows.3[1,"uniqueID"], 720 uniqueID = rows.3[1,"uniqueID"],
709 normalized_read_count = sum(rows.3$normalized_read_count)) 721 normalized_read_count = sum(rows.3$normalized_read_count))
710 } 722 }
711 723
712 if(matches.in.2 & matches.in.3){ 724 if(matches.in.2 & matches.in.3){
713 merge.123 = merge(sum.1, sum.2, by="merge") 725 merge.123 = merge(sum.1, sum.2, by="merge")
714 merge.123 = merge(merge.123, sum.3, by="merge") 726 merge.123 = merge(merge.123, sum.3, by="merge")
715 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="") 727 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
716 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz]) 728 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
717 729
718 patientMerge = rbind(patientMerge, merge.123) 730 patientMerge = rbind(patientMerge, merge.123)
719 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),] 731 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
720 732
721 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) 733 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
722 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) 734 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
723 735
724 } else if (matches.in.2) { 736 } else if (matches.in.2) {
725 #other.sample1 = other.sample.list[[first.sample]][1] 737 #other.sample1 = other.sample.list[[first.sample]][1]
726 #other.sample2 = other.sample.list[[first.sample]][2] 738 #other.sample2 = other.sample.list[[first.sample]][2]
727 739
728 second.sample = sum.2[,"Sample"] 740 second.sample = sum.2[,"Sample"]
729 741
730 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]] 742 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
731 743
732 merge.12 = merge(sum.1, sum.2, by="merge") 744 merge.12 = merge(sum.1, sum.2, by="merge")
733 745
734 current.merge.list = rbind(current.merge.list, merge.12) 746 current.merge.list = rbind(current.merge.list, merge.12)
735 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list 747 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
736 748
737 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),] 749 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
738 750
739 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) 751 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
740 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) 752 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
741 753
742 } else if (matches.in.3) { 754 } else if (matches.in.3) {
743 755
744 #other.sample1 = other.sample.list[[first.sample]][1] 756 #other.sample1 = other.sample.list[[first.sample]][1]
745 #other.sample2 = other.sample.list[[first.sample]][2] 757 #other.sample2 = other.sample.list[[first.sample]][2]
746 758
747 second.sample = sum.3[,"Sample"] 759 second.sample = sum.3[,"Sample"]
748 760
749 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]] 761 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
750 762
751 merge.13 = merge(sum.1, sum.3, by="merge") 763 merge.13 = merge(sum.1, sum.3, by="merge")
752 764
753 current.merge.list = rbind(current.merge.list, merge.13) 765 current.merge.list = rbind(current.merge.list, merge.13)
754 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list 766 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
755 767
756 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),] 768 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
757 769
758 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) 770 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
759 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) 771 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
760 772
761 } else if(nrow(rows.1) > 1){ 773 } else if(nrow(rows.1) > 1){
762 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),] 774 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
763 print(names(patient1)[names(patient1) %in% sum.1]) 775 print(names(patient1)[names(patient1) %in% sum.1])
764 print(names(patient1)[!(names(patient1) %in% sum.1)]) 776 print(names(patient1)[!(names(patient1) %in% sum.1)])
765 print(names(patient1)) 777 print(names(patient1))
766 print(names(sum.1)) 778 print(names(sum.1))
767 print(summary(sum.1)) 779 print(summary(sum.1))
768 print(summary(patient1)) 780 print(summary(patient1))
769 print(dim(sum.1)) 781 print(dim(sum.1))
770 print(dim(patient1)) 782 print(dim(patient1))
771 print(head(sum.1[,names(patient1)])) 783 print(head(sum.1[,names(patient1)]))
772 patient1 = rbind(patient1, sum.1[,names(patient1)]) 784 patient1 = rbind(patient1, sum.1[,names(patient1)])
773 patient.fuzzy = patient.fuzzy[-match.filter.1,] 785 patient.fuzzy = patient.fuzzy[-match.filter.1,]
774 } else { 786 } else {
775 patient.fuzzy = patient.fuzzy[-1,] 787 patient.fuzzy = patient.fuzzy[-1,]
776 } 788 }
777 789
778 tmp.rows = rbind(rows.1, rows.2, rows.3) 790 tmp.rows = rbind(rows.1, rows.2, rows.3)
779 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),] 791 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
780 792
781 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) { 793 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
782 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T) 794 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
783 } else { 795 } else {
784 } 796 }
785 797
786 } 798 }
787 patient.merge.list[[paste(label1, "123")]] = patientMerge 799 patient.merge.list[[paste(label1, "123")]] = patientMerge
788 800
789 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]] 801 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
790 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]] 802 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
791 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]] 803 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
792 804
793 patient.merge.list[[paste(label1, "12")]] = patientMerge12 805 patient.merge.list[[paste(label1, "12")]] = patientMerge12
794 patient.merge.list[[paste(label1, "13")]] = patientMerge13 806 patient.merge.list[[paste(label1, "13")]] = patientMerge13
795 patient.merge.list[[paste(label1, "23")]] = patientMerge23 807 patient.merge.list[[paste(label1, "23")]] = patientMerge23
796 808
797 patient.merge.list.second[[label1]] = merge.list[["second"]] 809 #patient.merge.list.second[[label1]] = merge.list[["second"]]
798 } 810 }
799 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) 811 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
800 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) 812 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
801 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) 813 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
802 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) 814 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
803 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) 815 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
804 816
805 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) 817 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
806 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony]) 818 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony])
807 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony]) 819 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony])
808 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony]) 820 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony])
809 821
810 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),] 822 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
811 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),] 823 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
812 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),] 824 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
813 825
814 if(F){ 826 if(F){
815 patientMerge = merge(patient1, patient2, by="merge") 827 patientMerge = merge(patient1, patient2, by="merge")
816 patientMerge = merge(patientMerge, patient3, by="merge") 828 patientMerge = merge(patientMerge, patient3, by="merge")
817 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") 829 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
818 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) 830 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
819 patientMerge12 = merge(patient1, patient2, by="merge") 831 patientMerge12 = merge(patient1, patient2, by="merge")
820 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) 832 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
821 patientMerge13 = merge(patient1, patient3, by="merge") 833 patientMerge13 = merge(patient1, patient3, by="merge")
822 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) 834 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
823 patientMerge23 = merge(patient2, patient3, by="merge") 835 patientMerge23 = merge(patient2, patient3, by="merge")
824 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) 836 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
825 } 837 }
826 838
827 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") 839 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
828 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns]) 840 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
829 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] 841 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
830 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple")) 842
831 843 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
832 res1 = vector() 844
833 res2 = vector() 845 res1 = vector()
834 res3 = vector() 846 res2 = vector()
835 res12 = vector() 847 res3 = vector()
836 res13 = vector() 848 res12 = vector()
837 res23 = vector() 849 res13 = vector()
838 resAll = vector() 850 res23 = vector()
839 read1Count = vector() 851 resAll = vector()
840 read2Count = vector() 852 read1Count = vector()
841 read3Count = vector() 853 read2Count = vector()
842 854 read3Count = vector()
843 if(appendTriplets){ 855
844 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3) 856 if(appendTriplets){
845 } 857 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
846 for(iter in 1:length(product[,1])){ 858 }
847 threshhold = product[iter,threshholdIndex] 859 for(iter in 1:length(product[,1])){
848 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") 860 threshhold = product[iter,threshholdIndex]
849 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") 861 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
850 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) 862 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
851 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) 863 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
852 864 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
853 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge)) 865
854 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge)) 866 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
855 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge)) 867 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
856 868 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
857 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge)) 869
858 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge)) 870 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
859 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge)) 871 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
860 872 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
861 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) 873
862 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) 874 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
863 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) 875 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
864 res1 = append(res1, sum(one)) 876 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
865 res2 = append(res2, sum(two)) 877 res1 = append(res1, sum(one))
866 res3 = append(res3, sum(three)) 878 res2 = append(res2, sum(two))
867 resAll = append(resAll, sum(all)) 879 res3 = append(res3, sum(three))
868 res12 = append(res12, sum(one_two)) 880 resAll = append(resAll, sum(all))
869 res13 = append(res13, sum(one_three)) 881 res12 = append(res12, sum(one_two))
870 res23 = append(res23, sum(two_three)) 882 res13 = append(res13, sum(one_three))
871 #threshhold = 0 883 res23 = append(res23, sum(two_three))
872 if(threshhold != 0){ 884 #threshhold = 0
873 if(sum(one) > 0){ 885 if(threshhold != 0){
874 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] 886 if(sum(one) > 0){
875 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone") 887 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
876 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") 888 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
877 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 889 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
878 } 890 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
879 if(sum(two) > 0){ 891 }
880 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] 892 if(sum(two) > 0){
881 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone") 893 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
882 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") 894 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
883 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 895 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
884 } 896 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
885 if(sum(three) > 0){ 897 }
886 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] 898 if(sum(three) > 0){
887 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone") 899 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
888 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") 900 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
889 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 901 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
890 } 902 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
891 if(sum(one_two) > 0){ 903 }
892 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] 904 if(sum(one_two) > 0){
893 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) 905 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
894 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") 906 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
895 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 907 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
896 } 908 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
897 if(sum(one_three) > 0){ 909 }
898 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] 910 if(sum(one_three) > 0){
899 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) 911 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
900 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") 912 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
901 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 913 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
902 } 914 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
903 if(sum(two_three) > 0){ 915 }
904 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] 916 if(sum(two_three) > 0){
905 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) 917 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
906 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") 918 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
907 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 919 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
908 } 920 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
909 } else { #scatterplot data 921 }
910 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] 922 } else { #scatterplot data
911 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),] 923 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
912 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge) 924 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
913 if(sum(in_two) > 0){ 925 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
914 scatterplot_locus_data[in_two,]$type = "In two" 926 if(sum(in_two) > 0){
915 } 927 scatterplot_locus_data[in_two,]$type = "In two"
916 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge) 928 }
917 if(sum(in_three)> 0){ 929 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
918 scatterplot_locus_data[in_three,]$type = "In three" 930 if(sum(in_three)> 0){
919 } 931 scatterplot_locus_data[in_three,]$type = "In three"
920 not_in_one = scatterplot_locus_data$type != "In one" 932 }
921 if(sum(not_in_one) > 0){ 933 not_in_one = scatterplot_locus_data$type != "In one"
922 #scatterplot_locus_data[not_in_one,]$type = "In multiple" 934 if(sum(not_in_one) > 0){
923 } 935 #scatterplot_locus_data[not_in_one,]$type = "In multiple"
924 p = NULL 936 }
925 if(nrow(scatterplot_locus_data) != 0){ 937 p = NULL
926 if(on == "normalized_read_count"){ 938 if(nrow(scatterplot_locus_data) != 0){
927 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) 939 if(on == "normalized_read_count"){
928 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6)) 940 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
929 } else { 941 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6))
930 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100)) 942 } else {
931 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) 943 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
932 } 944 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
933 p = p + geom_point(aes(colour=type), position="jitter") 945 }
934 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) 946 p = p + geom_point(aes(colour=type), position="jitter")
935 } else { 947 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
936 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) 948 } else {
937 } 949 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
938 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) 950 }
939 print(p) 951 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
940 dev.off() 952 print(p)
941 } 953 dev.off()
942 if(sum(all) > 0){ 954 }
943 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] 955 if(sum(all) > 0){
944 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) 956 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
945 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") 957 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
946 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 958 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
947 } 959 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
948 } 960 }
949 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) 961 }
950 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23) 962 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
951 colnames(patientResult)[6] = oneSample 963 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
952 colnames(patientResult)[7] = twoSample 964 colnames(patientResult)[6] = oneSample
953 colnames(patientResult)[8] = threeSample 965 colnames(patientResult)[7] = twoSample
954 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_") 966 colnames(patientResult)[8] = threeSample
955 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_") 967 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
956 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_") 968 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
957 969 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
958 colnamesBak = colnames(patientResult) 970
959 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample)) 971 colnamesBak = colnames(patientResult)
960 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 972 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
961 colnames(patientResult) = colnamesBak 973 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
962 974 colnames(patientResult) = colnamesBak
963 patientResult$Locus = factor(patientResult$Locus, Titles) 975
964 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep="")) 976 patientResult$Locus = factor(patientResult$Locus, Titles)
965 977 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
966 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")]) 978
967 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a") 979 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
968 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) 980 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
969 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0) 981 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
970 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All") 982 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
971 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines")) 983 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
972 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080) 984 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
973 print(plt) 985 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
974 dev.off() 986 print(plt)
975 987 dev.off()
976 fontSize = 4 988
977 989 fontSize = 4
978 bak = patientResult 990
979 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2) 991 bak = patientResult
980 patientResult$relativeValue = patientResult$value * 10 992 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
981 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1 993 patientResult$relativeValue = patientResult$value * 10
982 plt = ggplot(patientResult) 994 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
983 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge") 995 plt = ggplot(patientResult)
984 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) 996 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
985 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9))) 997 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
986 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize) 998 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
987 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize) 999 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
988 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize) 1000 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
989 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample") 1001 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
990 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) 1002 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
991 print(plt) 1003 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
992 dev.off() 1004 print(plt)
1005 dev.off()
993 } 1006 }
994 1007
995 if(nrow(triplets) != 0){ 1008 if(nrow(triplets) != 0){
996 1009
997 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T) 1010 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
998 1011
999 triplets$uniqueID = "ID" 1012 triplets$uniqueID = paste(triplets$Patient, triplets$Sample, sep="_")
1000 1013
1001 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" 1014 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
1002 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" 1015
1003 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" 1016 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
1004 1017 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
1005 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" 1018 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
1006 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" 1019
1007 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" 1020 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
1008 1021 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
1009 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" 1022
1010 1023 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
1011 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T) 1024
1012 1025 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
1013 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) 1026
1014 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) 1027 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2)
1015 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) 1028
1016 1029 triplets = triplets[triplets$normalized_read_count >= min_cells,]
1017 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) 1030
1018 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J) 1031 column_drops = c("min_cell_count", "min_cell_paste")
1019 1032
1020 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")] 1033 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
1021 1034
1022 triplets = merge(triplets, min_cell_count, by="min_cell_paste") 1035 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
1023 1036
1024 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 1037 interval = intervalReads
1025 1038 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
1026 triplets = triplets[triplets$normalized_read_count >= min_cells,] 1039 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
1027 1040
1028 column_drops = c("min_cell_count", "min_cell_paste") 1041 triplets = split(triplets, triplets$Patient, drop=T)
1029 1042 print(nrow(triplets))
1030 triplets = triplets[,!(colnames(triplets) %in% column_drops)] 1043 for(triplet in triplets){
1031 1044 samples = unique(triplet$Sample)
1032 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) 1045 one = triplet[triplet$Sample == samples[1],]
1033 1046 two = triplet[triplet$Sample == samples[2],]
1034 interval = intervalReads 1047 three = triplet[triplet$Sample == samples[3],]
1035 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 1048
1036 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 1049 print(paste(nrow(triplet), nrow(one), nrow(two), nrow(three)))
1037 1050 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="normalized_read_count", T)
1038 one = triplets[triplets$Sample == "14696_reg_BM",] 1051 }
1039 two = triplets[triplets$Sample == "24536_reg_BM",] 1052
1040 three = triplets[triplets$Sample == "24062_reg_BM",] 1053 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
1041 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T) 1054
1042 1055 interval = intervalFreq
1043 one = triplets[triplets$Sample == "16278_Left",] 1056 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
1044 two = triplets[triplets$Sample == "26402_Left",] 1057 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
1045 three = triplets[triplets$Sample == "26759_Left",] 1058
1046 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T) 1059 for(triplet in triplets){
1047 1060 samples = unique(triplet$Sample)
1048 one = triplets[triplets$Sample == "16278_Right",] 1061 one = triplet[triplet$Sample == samples[1],]
1049 two = triplets[triplets$Sample == "26402_Right",] 1062 two = triplet[triplet$Sample == samples[2],]
1050 three = triplets[triplets$Sample == "26759_Right",] 1063 three = triplet[triplet$Sample == samples[3],]
1051 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T) 1064 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="Frequency", F)
1052 1065 }
1053 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
1054
1055 interval = intervalFreq
1056 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
1057 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
1058
1059 one = triplets[triplets$Sample == "14696_reg_BM",]
1060 two = triplets[triplets$Sample == "24536_reg_BM",]
1061 three = triplets[triplets$Sample == "24062_reg_BM",]
1062 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
1063
1064 one = triplets[triplets$Sample == "16278_Left",]
1065 two = triplets[triplets$Sample == "26402_Left",]
1066 three = triplets[triplets$Sample == "26759_Left",]
1067 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
1068
1069 one = triplets[triplets$Sample == "16278_Right",]
1070 two = triplets[triplets$Sample == "26402_Right",]
1071 three = triplets[triplets$Sample == "26759_Right",]
1072 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
1073 } else { 1066 } else {
1074 cat("", file="triplets.txt") 1067 cat("", file="triplets.txt")
1075 } 1068 }
1076 cat("</table></html>", file=logfile, append=T) 1069 cat("</table></html>", file=logfile, append=T)