Mercurial > repos > davidvanzessen > prisca
comparison RScript.r @ 1:75853bceec00 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 17 Jan 2017 07:24:44 -0500 |
parents | ed6885c85660 |
children | 7ffd0fba8cf4 |
comparison
equal
deleted
inserted
replaced
0:ed6885c85660 | 1:75853bceec00 |
---|---|
32 | 32 |
33 dat$Frequency = ((10^dat$Log10_Frequency)*100) | 33 dat$Frequency = ((10^dat$Log10_Frequency)*100) |
34 | 34 |
35 dat = dat[dat$Frequency >= min_freq,] | 35 dat = dat[dat$Frequency >= min_freq,] |
36 | 36 |
37 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),] | 37 patient.sample.counts = data.frame(data.table(dat)[, list(count=.N), by=c("Patient", "Sample")]) |
38 patient.sample.counts = data.frame(data.table(patient.sample.counts)[, list(count=.N), by=c("Patient")]) | |
39 | |
40 print("Found the following patients with number of samples:") | |
41 print(patient.sample.counts) | |
42 | |
43 patient.sample.counts.pairs = patient.sample.counts[patient.sample.counts$count %in% 1:2,] | |
44 patient.sample.counts.triplets = patient.sample.counts[patient.sample.counts$count == 3,] | |
45 | |
46 | |
47 | |
48 triplets = dat[dat$Patient %in% patient.sample.counts.triplets$Patient,] | |
49 dat = dat[dat$Patient %in% patient.sample.counts.pairs$Patient,] | |
38 | 50 |
39 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T) | 51 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T) |
40 | 52 |
41 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4) | 53 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4) |
42 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4) | 54 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4) |
473 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep="")) | 485 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep="")) |
474 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080) | 486 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080) |
475 print(plt) | 487 print(plt) |
476 dev.off() | 488 dev.off() |
477 } | 489 } |
478 | 490 if(length(patients) > 0){ |
479 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) | 491 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) |
480 | 492 |
481 interval = intervalFreq | 493 interval = intervalFreq |
482 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 494 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
483 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 495 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
484 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) | 496 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) |
485 | 497 |
486 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) | 498 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) |
487 | 499 |
488 interval = intervalReads | 500 interval = intervalReads |
489 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 501 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
490 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 502 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
491 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") | 503 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") |
492 | 504 } |
493 if(nrow(single_patients) > 0){ | 505 if(nrow(single_patients) > 0){ |
494 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 506 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
495 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000)) | 507 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000)) |
496 p = p + geom_point(aes(colour=type), position="jitter") | 508 p = p + geom_point(aes(colour=type), position="jitter") |
497 p = p + xlab("In one or both samples") + ylab("Reads") | 509 p = p + xlab("In one or both samples") + ylab("Reads") |
523 | 535 |
524 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... | 536 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... |
525 patient.merge.list.second = list() | 537 patient.merge.list.second = list() |
526 | 538 |
527 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ | 539 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ |
528 onShort = "reads" | 540 onShort = "reads" |
529 if(on == "Frequency"){ | 541 if(on == "Frequency"){ |
530 onShort = "freq" | 542 onShort = "freq" |
531 } | 543 } |
532 onx = paste(on, ".x", sep="") | 544 onx = paste(on, ".x", sep="") |
533 ony = paste(on, ".y", sep="") | 545 ony = paste(on, ".y", sep="") |
534 onz = paste(on, ".z", sep="") | 546 onz = paste(on, ".z", sep="") |
535 type="triplet" | 547 type="triplet" |
536 | 548 |
537 threshholdIndex = which(colnames(product) == "interval") | 549 threshholdIndex = which(colnames(product) == "interval") |
538 V_SegmentIndex = which(colnames(product) == "V_Segments") | 550 V_SegmentIndex = which(colnames(product) == "V_Segments") |
539 J_SegmentIndex = which(colnames(product) == "J_Segments") | 551 J_SegmentIndex = which(colnames(product) == "J_Segments") |
540 titleIndex = which(colnames(product) == "Titles") | 552 titleIndex = which(colnames(product) == "Titles") |
541 sampleIndex = which(colnames(patient1) == "Sample") | 553 sampleIndex = which(colnames(patient1) == "Sample") |
542 patientIndex = which(colnames(patient1) == "Patient") | 554 patientIndex = which(colnames(patient1) == "Patient") |
543 oneSample = paste(patient1[1,sampleIndex], sep="") | 555 oneSample = paste(patient1[1,sampleIndex], sep="") |
544 twoSample = paste(patient2[1,sampleIndex], sep="") | 556 twoSample = paste(patient2[1,sampleIndex], sep="") |
545 threeSample = paste(patient3[1,sampleIndex], sep="") | 557 threeSample = paste(patient3[1,sampleIndex], sep="") |
546 | 558 |
547 if(mergeOn == "Clone_Sequence"){ | 559 if(mergeOn == "Clone_Sequence"){ |
548 patient1$merge = paste(patient1$Clone_Sequence) | 560 patient1$merge = paste(patient1$Clone_Sequence) |
549 patient2$merge = paste(patient2$Clone_Sequence) | 561 patient2$merge = paste(patient2$Clone_Sequence) |
550 patient3$merge = paste(patient3$Clone_Sequence) | 562 patient3$merge = paste(patient3$Clone_Sequence) |
551 | 563 |
552 } else { | 564 } else { |
553 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | 565 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) |
554 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | 566 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) |
555 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) | 567 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) |
556 } | 568 } |
557 | 569 |
558 #patientMerge = merge(patient1, patient2, by="merge")[NULL,] | 570 #patientMerge = merge(patient1, patient2, by="merge")[NULL,] |
559 patient1.fuzzy = patient1 | 571 patient1.fuzzy = patient1 |
560 patient2.fuzzy = patient2 | 572 patient2.fuzzy = patient2 |
561 patient3.fuzzy = patient3 | 573 patient3.fuzzy = patient3 |
562 | 574 |
563 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T) | 575 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T) |
564 | 576 |
565 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) | 577 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) |
566 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) | 578 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) |
567 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J) | 579 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J) |
568 | 580 |
569 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy) | 581 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy) |
570 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] | 582 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] |
571 | 583 |
572 other.sample.list = list() | 584 other.sample.list = list() |
573 other.sample.list[[oneSample]] = c(twoSample, threeSample) | 585 other.sample.list[[oneSample]] = c(twoSample, threeSample) |
574 other.sample.list[[twoSample]] = c(oneSample, threeSample) | 586 other.sample.list[[twoSample]] = c(oneSample, threeSample) |
575 other.sample.list[[threeSample]] = c(oneSample, twoSample) | 587 other.sample.list[[threeSample]] = c(oneSample, twoSample) |
576 | 588 |
577 patientMerge = merge(patient1, patient2, by="merge") | 589 patientMerge = merge(patient1, patient2, by="merge") |
578 patientMerge = merge(patientMerge, patient3, by="merge") | 590 patientMerge = merge(patientMerge, patient3, by="merge") |
579 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") | 591 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") |
580 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | 592 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) |
581 patientMerge = patientMerge[NULL,] | 593 patientMerge = patientMerge[NULL,] |
582 | 594 |
583 duo.merge.list = list() | 595 duo.merge.list = list() |
584 | 596 |
585 patientMerge12 = merge(patient1, patient2, by="merge") | 597 patientMerge12 = merge(patient1, patient2, by="merge") |
586 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) | 598 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) |
587 patientMerge12 = patientMerge12[NULL,] | 599 patientMerge12 = patientMerge12[NULL,] |
588 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12 | 600 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12 |
589 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12 | 601 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12 |
590 | 602 |
591 patientMerge13 = merge(patient1, patient3, by="merge") | 603 patientMerge13 = merge(patient1, patient3, by="merge") |
592 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) | 604 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) |
593 patientMerge13 = patientMerge13[NULL,] | 605 patientMerge13 = patientMerge13[NULL,] |
594 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13 | 606 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13 |
595 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13 | 607 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13 |
596 | 608 |
597 patientMerge23 = merge(patient2, patient3, by="merge") | 609 patientMerge23 = merge(patient2, patient3, by="merge") |
598 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) | 610 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) |
599 patientMerge23 = patientMerge23[NULL,] | 611 patientMerge23 = patientMerge23[NULL,] |
600 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23 | 612 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23 |
601 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23 | 613 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23 |
602 | 614 |
603 merge.list = list() | 615 merge.list = list() |
604 merge.list[["second"]] = vector() | 616 merge.list[["second"]] = vector() |
605 | 617 |
606 start.time = proc.time() | 618 #print(paste(nrow(patient1), nrow(patient2), nrow(patient3), label1, label2, label3)) |
607 if(paste(label1, "123") %in% names(patient.merge.list)){ | 619 |
608 patientMerge = patient.merge.list[[paste(label1, "123")]] | 620 start.time = proc.time() |
609 patientMerge12 = patient.merge.list[[paste(label1, "12")]] | 621 if(paste(label1, "123") %in% names(patient.merge.list)){ |
610 patientMerge13 = patient.merge.list[[paste(label1, "13")]] | 622 patientMerge = patient.merge.list[[paste(label1, "123")]] |
611 patientMerge23 = patient.merge.list[[paste(label1, "23")]] | 623 patientMerge12 = patient.merge.list[[paste(label1, "12")]] |
612 | 624 patientMerge13 = patient.merge.list[[paste(label1, "13")]] |
613 merge.list[["second"]] = patient.merge.list.second[[label1]] | 625 patientMerge23 = patient.merge.list[[paste(label1, "23")]] |
614 | 626 |
615 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) | 627 #merge.list[["second"]] = patient.merge.list.second[[label1]] |
616 } else { | 628 |
617 while(nrow(patient.fuzzy) > 0){ | 629 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) |
618 first.merge = patient.fuzzy[1,"merge"] | 630 } else { |
619 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] | 631 while(nrow(patient.fuzzy) > 0){ |
620 first.sample = patient.fuzzy[1,"Sample"] | 632 first.merge = patient.fuzzy[1,"merge"] |
621 | 633 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] |
622 merge.filter = first.merge == patient.fuzzy$merge | 634 first.sample = paste(patient.fuzzy[1,"Sample"], sep="") |
623 | 635 |
624 second.sample = other.sample.list[[first.sample]][1] | 636 merge.filter = first.merge == patient.fuzzy$merge |
625 third.sample = other.sample.list[[first.sample]][2] | 637 |
626 | 638 second.sample = other.sample.list[[first.sample]][1] |
627 sample.filter.1 = first.sample == patient.fuzzy$Sample | 639 third.sample = other.sample.list[[first.sample]][2] |
628 sample.filter.2 = second.sample == patient.fuzzy$Sample | 640 |
629 sample.filter.3 = third.sample == patient.fuzzy$Sample | 641 sample.filter.1 = first.sample == patient.fuzzy$Sample |
630 | 642 sample.filter.2 = second.sample == patient.fuzzy$Sample |
631 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) | 643 sample.filter.3 = third.sample == patient.fuzzy$Sample |
632 | 644 |
633 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter | 645 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) |
634 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter | 646 |
635 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter | 647 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter |
636 | 648 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter |
637 matches.in.1 = any(match.filter.1) | 649 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter |
638 matches.in.2 = any(match.filter.2) | 650 |
639 matches.in.3 = any(match.filter.3) | 651 matches.in.1 = any(match.filter.1) |
640 | 652 matches.in.2 = any(match.filter.2) |
641 | 653 matches.in.3 = any(match.filter.3) |
642 | 654 |
643 rows.1 = patient.fuzzy[match.filter.1,] | 655 rows.1 = patient.fuzzy[match.filter.1,] |
644 | 656 |
645 sum.1 = data.frame(merge = first.clone.sequence, | 657 sum.1 = data.frame(merge = first.clone.sequence, |
646 Patient = label1, | 658 Patient = label1, |
647 Receptor = rows.1[1,"Receptor"], | 659 Receptor = rows.1[1,"Receptor"], |
648 Sample = rows.1[1,"Sample"], | 660 Sample = rows.1[1,"Sample"], |
649 Cell_Count = rows.1[1,"Cell_Count"], | 661 Cell_Count = rows.1[1,"Cell_Count"], |
650 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes), | 662 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes), |
651 Log10_Frequency = log10(sum(rows.1$Frequency)), | 663 Log10_Frequency = log10(sum(rows.1$Frequency)), |
652 Total_Read_Count = sum(rows.1$Total_Read_Count), | 664 Total_Read_Count = sum(rows.1$Total_Read_Count), |
653 dsPerM = sum(rows.1$dsPerM), | 665 dsPerM = sum(rows.1$dsPerM), |
654 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"], | 666 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"], |
655 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"], | 667 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"], |
656 Clone_Sequence = first.clone.sequence, | 668 Clone_Sequence = first.clone.sequence, |
657 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"], | 669 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"], |
658 Related_to_leukemia_clone = F, | 670 Related_to_leukemia_clone = F, |
659 Frequency = sum(rows.1$Frequency), | 671 Frequency = sum(rows.1$Frequency), |
660 locus_V = rows.1[1,"locus_V"], | 672 locus_V = rows.1[1,"locus_V"], |
661 locus_J = rows.1[1,"locus_J"], | 673 locus_J = rows.1[1,"locus_J"], |
662 uniqueID = rows.1[1,"uniqueID"], | 674 uniqueID = rows.1[1,"uniqueID"], |
663 normalized_read_count = sum(rows.1$normalized_read_count)) | 675 normalized_read_count = sum(rows.1$normalized_read_count)) |
664 sum.2 = sum.1[NULL,] | 676 sum.2 = sum.1[NULL,] |
665 rows.2 = patient.fuzzy[match.filter.2,] | 677 rows.2 = patient.fuzzy[match.filter.2,] |
666 if(matches.in.2){ | 678 if(matches.in.2){ |
667 sum.2 = data.frame(merge = first.clone.sequence, | 679 sum.2 = data.frame(merge = first.clone.sequence, |
668 Patient = label1, | 680 Patient = label1, |
669 Receptor = rows.2[1,"Receptor"], | 681 Receptor = rows.2[1,"Receptor"], |
670 Sample = rows.2[1,"Sample"], | 682 Sample = rows.2[1,"Sample"], |
671 Cell_Count = rows.2[1,"Cell_Count"], | 683 Cell_Count = rows.2[1,"Cell_Count"], |
672 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes), | 684 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes), |
673 Log10_Frequency = log10(sum(rows.2$Frequency)), | 685 Log10_Frequency = log10(sum(rows.2$Frequency)), |
674 Total_Read_Count = sum(rows.2$Total_Read_Count), | 686 Total_Read_Count = sum(rows.2$Total_Read_Count), |
675 dsPerM = sum(rows.2$dsPerM), | 687 dsPerM = sum(rows.2$dsPerM), |
676 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"], | 688 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"], |
677 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"], | 689 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"], |
678 Clone_Sequence = first.clone.sequence, | 690 Clone_Sequence = first.clone.sequence, |
679 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"], | 691 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"], |
680 Related_to_leukemia_clone = F, | 692 Related_to_leukemia_clone = F, |
681 Frequency = sum(rows.2$Frequency), | 693 Frequency = sum(rows.2$Frequency), |
682 locus_V = rows.2[1,"locus_V"], | 694 locus_V = rows.2[1,"locus_V"], |
683 locus_J = rows.2[1,"locus_J"], | 695 locus_J = rows.2[1,"locus_J"], |
684 uniqueID = rows.2[1,"uniqueID"], | 696 uniqueID = rows.2[1,"uniqueID"], |
685 normalized_read_count = sum(rows.2$normalized_read_count)) | 697 normalized_read_count = sum(rows.2$normalized_read_count)) |
686 } | 698 } |
687 | 699 |
688 sum.3 = sum.1[NULL,] | 700 sum.3 = sum.1[NULL,] |
689 rows.3 = patient.fuzzy[match.filter.3,] | 701 rows.3 = patient.fuzzy[match.filter.3,] |
690 if(matches.in.3){ | 702 if(matches.in.3){ |
691 sum.3 = data.frame(merge = first.clone.sequence, | 703 sum.3 = data.frame(merge = first.clone.sequence, |
692 Patient = label1, | 704 Patient = label1, |
693 Receptor = rows.3[1,"Receptor"], | 705 Receptor = rows.3[1,"Receptor"], |
694 Sample = rows.3[1,"Sample"], | 706 Sample = rows.3[1,"Sample"], |
695 Cell_Count = rows.3[1,"Cell_Count"], | 707 Cell_Count = rows.3[1,"Cell_Count"], |
696 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes), | 708 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes), |
697 Log10_Frequency = log10(sum(rows.3$Frequency)), | 709 Log10_Frequency = log10(sum(rows.3$Frequency)), |
698 Total_Read_Count = sum(rows.3$Total_Read_Count), | 710 Total_Read_Count = sum(rows.3$Total_Read_Count), |
699 dsPerM = sum(rows.3$dsPerM), | 711 dsPerM = sum(rows.3$dsPerM), |
700 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"], | 712 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"], |
701 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"], | 713 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"], |
702 Clone_Sequence = first.clone.sequence, | 714 Clone_Sequence = first.clone.sequence, |
703 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"], | 715 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"], |
704 Related_to_leukemia_clone = F, | 716 Related_to_leukemia_clone = F, |
705 Frequency = sum(rows.3$Frequency), | 717 Frequency = sum(rows.3$Frequency), |
706 locus_V = rows.3[1,"locus_V"], | 718 locus_V = rows.3[1,"locus_V"], |
707 locus_J = rows.3[1,"locus_J"], | 719 locus_J = rows.3[1,"locus_J"], |
708 uniqueID = rows.3[1,"uniqueID"], | 720 uniqueID = rows.3[1,"uniqueID"], |
709 normalized_read_count = sum(rows.3$normalized_read_count)) | 721 normalized_read_count = sum(rows.3$normalized_read_count)) |
710 } | 722 } |
711 | 723 |
712 if(matches.in.2 & matches.in.3){ | 724 if(matches.in.2 & matches.in.3){ |
713 merge.123 = merge(sum.1, sum.2, by="merge") | 725 merge.123 = merge(sum.1, sum.2, by="merge") |
714 merge.123 = merge(merge.123, sum.3, by="merge") | 726 merge.123 = merge(merge.123, sum.3, by="merge") |
715 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="") | 727 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="") |
716 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz]) | 728 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz]) |
717 | 729 |
718 patientMerge = rbind(patientMerge, merge.123) | 730 patientMerge = rbind(patientMerge, merge.123) |
719 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),] | 731 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),] |
720 | 732 |
721 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) | 733 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) |
722 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | 734 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) |
723 | 735 |
724 } else if (matches.in.2) { | 736 } else if (matches.in.2) { |
725 #other.sample1 = other.sample.list[[first.sample]][1] | 737 #other.sample1 = other.sample.list[[first.sample]][1] |
726 #other.sample2 = other.sample.list[[first.sample]][2] | 738 #other.sample2 = other.sample.list[[first.sample]][2] |
727 | 739 |
728 second.sample = sum.2[,"Sample"] | 740 second.sample = sum.2[,"Sample"] |
729 | 741 |
730 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]] | 742 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]] |
731 | 743 |
732 merge.12 = merge(sum.1, sum.2, by="merge") | 744 merge.12 = merge(sum.1, sum.2, by="merge") |
733 | 745 |
734 current.merge.list = rbind(current.merge.list, merge.12) | 746 current.merge.list = rbind(current.merge.list, merge.12) |
735 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list | 747 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list |
736 | 748 |
737 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),] | 749 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),] |
738 | 750 |
739 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) | 751 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) |
740 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | 752 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) |
741 | 753 |
742 } else if (matches.in.3) { | 754 } else if (matches.in.3) { |
743 | 755 |
744 #other.sample1 = other.sample.list[[first.sample]][1] | 756 #other.sample1 = other.sample.list[[first.sample]][1] |
745 #other.sample2 = other.sample.list[[first.sample]][2] | 757 #other.sample2 = other.sample.list[[first.sample]][2] |
746 | 758 |
747 second.sample = sum.3[,"Sample"] | 759 second.sample = sum.3[,"Sample"] |
748 | 760 |
749 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]] | 761 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]] |
750 | 762 |
751 merge.13 = merge(sum.1, sum.3, by="merge") | 763 merge.13 = merge(sum.1, sum.3, by="merge") |
752 | 764 |
753 current.merge.list = rbind(current.merge.list, merge.13) | 765 current.merge.list = rbind(current.merge.list, merge.13) |
754 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list | 766 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list |
755 | 767 |
756 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),] | 768 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),] |
757 | 769 |
758 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) | 770 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) |
759 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | 771 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) |
760 | 772 |
761 } else if(nrow(rows.1) > 1){ | 773 } else if(nrow(rows.1) > 1){ |
762 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),] | 774 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),] |
763 print(names(patient1)[names(patient1) %in% sum.1]) | 775 print(names(patient1)[names(patient1) %in% sum.1]) |
764 print(names(patient1)[!(names(patient1) %in% sum.1)]) | 776 print(names(patient1)[!(names(patient1) %in% sum.1)]) |
765 print(names(patient1)) | 777 print(names(patient1)) |
766 print(names(sum.1)) | 778 print(names(sum.1)) |
767 print(summary(sum.1)) | 779 print(summary(sum.1)) |
768 print(summary(patient1)) | 780 print(summary(patient1)) |
769 print(dim(sum.1)) | 781 print(dim(sum.1)) |
770 print(dim(patient1)) | 782 print(dim(patient1)) |
771 print(head(sum.1[,names(patient1)])) | 783 print(head(sum.1[,names(patient1)])) |
772 patient1 = rbind(patient1, sum.1[,names(patient1)]) | 784 patient1 = rbind(patient1, sum.1[,names(patient1)]) |
773 patient.fuzzy = patient.fuzzy[-match.filter.1,] | 785 patient.fuzzy = patient.fuzzy[-match.filter.1,] |
774 } else { | 786 } else { |
775 patient.fuzzy = patient.fuzzy[-1,] | 787 patient.fuzzy = patient.fuzzy[-1,] |
776 } | 788 } |
777 | 789 |
778 tmp.rows = rbind(rows.1, rows.2, rows.3) | 790 tmp.rows = rbind(rows.1, rows.2, rows.3) |
779 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),] | 791 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),] |
780 | 792 |
781 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) { | 793 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) { |
782 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T) | 794 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T) |
783 } else { | 795 } else { |
784 } | 796 } |
785 | 797 |
786 } | 798 } |
787 patient.merge.list[[paste(label1, "123")]] = patientMerge | 799 patient.merge.list[[paste(label1, "123")]] = patientMerge |
788 | 800 |
789 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]] | 801 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]] |
790 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]] | 802 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]] |
791 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]] | 803 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]] |
792 | 804 |
793 patient.merge.list[[paste(label1, "12")]] = patientMerge12 | 805 patient.merge.list[[paste(label1, "12")]] = patientMerge12 |
794 patient.merge.list[[paste(label1, "13")]] = patientMerge13 | 806 patient.merge.list[[paste(label1, "13")]] = patientMerge13 |
795 patient.merge.list[[paste(label1, "23")]] = patientMerge23 | 807 patient.merge.list[[paste(label1, "23")]] = patientMerge23 |
796 | 808 |
797 patient.merge.list.second[[label1]] = merge.list[["second"]] | 809 #patient.merge.list.second[[label1]] = merge.list[["second"]] |
798 } | 810 } |
799 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) | 811 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) |
800 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | 812 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) |
801 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) | 813 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) |
802 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) | 814 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) |
803 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) | 815 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) |
804 | 816 |
805 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | 817 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) |
806 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony]) | 818 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony]) |
807 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony]) | 819 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony]) |
808 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony]) | 820 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony]) |
809 | 821 |
810 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),] | 822 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),] |
811 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),] | 823 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),] |
812 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),] | 824 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),] |
813 | 825 |
814 if(F){ | 826 if(F){ |
815 patientMerge = merge(patient1, patient2, by="merge") | 827 patientMerge = merge(patient1, patient2, by="merge") |
816 patientMerge = merge(patientMerge, patient3, by="merge") | 828 patientMerge = merge(patientMerge, patient3, by="merge") |
817 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") | 829 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") |
818 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | 830 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) |
819 patientMerge12 = merge(patient1, patient2, by="merge") | 831 patientMerge12 = merge(patient1, patient2, by="merge") |
820 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) | 832 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) |
821 patientMerge13 = merge(patient1, patient3, by="merge") | 833 patientMerge13 = merge(patient1, patient3, by="merge") |
822 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) | 834 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) |
823 patientMerge23 = merge(patient2, patient3, by="merge") | 835 patientMerge23 = merge(patient2, patient3, by="merge") |
824 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) | 836 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) |
825 } | 837 } |
826 | 838 |
827 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") | 839 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") |
828 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns]) | 840 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns]) |
829 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] | 841 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] |
830 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple")) | 842 |
831 | 843 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple")) |
832 res1 = vector() | 844 |
833 res2 = vector() | 845 res1 = vector() |
834 res3 = vector() | 846 res2 = vector() |
835 res12 = vector() | 847 res3 = vector() |
836 res13 = vector() | 848 res12 = vector() |
837 res23 = vector() | 849 res13 = vector() |
838 resAll = vector() | 850 res23 = vector() |
839 read1Count = vector() | 851 resAll = vector() |
840 read2Count = vector() | 852 read1Count = vector() |
841 read3Count = vector() | 853 read2Count = vector() |
842 | 854 read3Count = vector() |
843 if(appendTriplets){ | 855 |
844 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3) | 856 if(appendTriplets){ |
845 } | 857 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3) |
846 for(iter in 1:length(product[,1])){ | 858 } |
847 threshhold = product[iter,threshholdIndex] | 859 for(iter in 1:length(product[,1])){ |
848 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 860 threshhold = product[iter,threshholdIndex] |
849 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 861 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
850 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) | 862 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
851 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) | 863 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) |
852 | 864 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) |
853 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge)) | 865 |
854 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge)) | 866 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge)) |
855 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge)) | 867 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge)) |
856 | 868 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge)) |
857 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge)) | 869 |
858 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge)) | 870 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge)) |
859 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge)) | 871 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge)) |
860 | 872 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge)) |
861 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) | 873 |
862 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) | 874 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) |
863 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) | 875 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) |
864 res1 = append(res1, sum(one)) | 876 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) |
865 res2 = append(res2, sum(two)) | 877 res1 = append(res1, sum(one)) |
866 res3 = append(res3, sum(three)) | 878 res2 = append(res2, sum(two)) |
867 resAll = append(resAll, sum(all)) | 879 res3 = append(res3, sum(three)) |
868 res12 = append(res12, sum(one_two)) | 880 resAll = append(resAll, sum(all)) |
869 res13 = append(res13, sum(one_three)) | 881 res12 = append(res12, sum(one_two)) |
870 res23 = append(res23, sum(two_three)) | 882 res13 = append(res13, sum(one_three)) |
871 #threshhold = 0 | 883 res23 = append(res23, sum(two_three)) |
872 if(threshhold != 0){ | 884 #threshhold = 0 |
873 if(sum(one) > 0){ | 885 if(threshhold != 0){ |
874 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 886 if(sum(one) > 0){ |
875 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone") | 887 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] |
876 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") | 888 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone") |
877 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 889 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") |
878 } | 890 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
879 if(sum(two) > 0){ | 891 } |
880 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 892 if(sum(two) > 0){ |
881 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone") | 893 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] |
882 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") | 894 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone") |
883 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 895 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") |
884 } | 896 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
885 if(sum(three) > 0){ | 897 } |
886 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 898 if(sum(three) > 0){ |
887 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone") | 899 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] |
888 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 900 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone") |
889 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 901 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") |
890 } | 902 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
891 if(sum(one_two) > 0){ | 903 } |
892 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] | 904 if(sum(one_two) > 0){ |
893 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) | 905 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] |
894 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") | 906 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) |
895 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 907 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") |
896 } | 908 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
897 if(sum(one_three) > 0){ | 909 } |
898 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] | 910 if(sum(one_three) > 0){ |
899 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) | 911 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] |
900 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") | 912 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) |
901 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 913 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") |
902 } | 914 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
903 if(sum(two_three) > 0){ | 915 } |
904 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] | 916 if(sum(two_three) > 0){ |
905 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) | 917 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] |
906 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") | 918 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) |
907 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 919 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") |
908 } | 920 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
909 } else { #scatterplot data | 921 } |
910 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] | 922 } else { #scatterplot data |
911 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),] | 923 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] |
912 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge) | 924 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),] |
913 if(sum(in_two) > 0){ | 925 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge) |
914 scatterplot_locus_data[in_two,]$type = "In two" | 926 if(sum(in_two) > 0){ |
915 } | 927 scatterplot_locus_data[in_two,]$type = "In two" |
916 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge) | 928 } |
917 if(sum(in_three)> 0){ | 929 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge) |
918 scatterplot_locus_data[in_three,]$type = "In three" | 930 if(sum(in_three)> 0){ |
919 } | 931 scatterplot_locus_data[in_three,]$type = "In three" |
920 not_in_one = scatterplot_locus_data$type != "In one" | 932 } |
921 if(sum(not_in_one) > 0){ | 933 not_in_one = scatterplot_locus_data$type != "In one" |
922 #scatterplot_locus_data[not_in_one,]$type = "In multiple" | 934 if(sum(not_in_one) > 0){ |
923 } | 935 #scatterplot_locus_data[not_in_one,]$type = "In multiple" |
924 p = NULL | 936 } |
925 if(nrow(scatterplot_locus_data) != 0){ | 937 p = NULL |
926 if(on == "normalized_read_count"){ | 938 if(nrow(scatterplot_locus_data) != 0){ |
927 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 939 if(on == "normalized_read_count"){ |
928 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6)) | 940 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
929 } else { | 941 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales, limits=c(1, 1e6)) |
930 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100)) | 942 } else { |
931 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) | 943 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100)) |
932 } | 944 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) |
933 p = p + geom_point(aes(colour=type), position="jitter") | 945 } |
934 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) | 946 p = p + geom_point(aes(colour=type), position="jitter") |
935 } else { | 947 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) |
936 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) | 948 } else { |
937 } | 949 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) |
938 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) | 950 } |
939 print(p) | 951 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) |
940 dev.off() | 952 print(p) |
941 } | 953 dev.off() |
942 if(sum(all) > 0){ | 954 } |
943 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] | 955 if(sum(all) > 0){ |
944 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) | 956 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] |
945 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 957 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) |
946 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 958 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") |
947 } | 959 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
948 } | 960 } |
949 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) | 961 } |
950 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23) | 962 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) |
951 colnames(patientResult)[6] = oneSample | 963 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23) |
952 colnames(patientResult)[7] = twoSample | 964 colnames(patientResult)[6] = oneSample |
953 colnames(patientResult)[8] = threeSample | 965 colnames(patientResult)[7] = twoSample |
954 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_") | 966 colnames(patientResult)[8] = threeSample |
955 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_") | 967 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_") |
956 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_") | 968 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_") |
957 | 969 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_") |
958 colnamesBak = colnames(patientResult) | 970 |
959 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample)) | 971 colnamesBak = colnames(patientResult) |
960 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 972 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample)) |
961 colnames(patientResult) = colnamesBak | 973 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
962 | 974 colnames(patientResult) = colnamesBak |
963 patientResult$Locus = factor(patientResult$Locus, Titles) | 975 |
964 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep="")) | 976 patientResult$Locus = factor(patientResult$Locus, Titles) |
965 | 977 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep="")) |
966 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")]) | 978 |
967 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a") | 979 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")]) |
968 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) | 980 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a") |
969 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0) | 981 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) |
970 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All") | 982 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0) |
971 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines")) | 983 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All") |
972 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080) | 984 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines")) |
973 print(plt) | 985 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080) |
974 dev.off() | 986 print(plt) |
975 | 987 dev.off() |
976 fontSize = 4 | 988 |
977 | 989 fontSize = 4 |
978 bak = patientResult | 990 |
979 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2) | 991 bak = patientResult |
980 patientResult$relativeValue = patientResult$value * 10 | 992 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2) |
981 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1 | 993 patientResult$relativeValue = patientResult$value * 10 |
982 plt = ggplot(patientResult) | 994 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1 |
983 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge") | 995 plt = ggplot(patientResult) |
984 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) | 996 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge") |
985 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9))) | 997 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) |
986 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize) | 998 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9))) |
987 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize) | 999 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize) |
988 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize) | 1000 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize) |
989 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample") | 1001 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize) |
990 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) | 1002 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample") |
991 print(plt) | 1003 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) |
992 dev.off() | 1004 print(plt) |
1005 dev.off() | |
993 } | 1006 } |
994 | 1007 |
995 if(nrow(triplets) != 0){ | 1008 if(nrow(triplets) != 0){ |
996 | 1009 |
997 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T) | 1010 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T) |
998 | 1011 |
999 triplets$uniqueID = "ID" | 1012 triplets$uniqueID = paste(triplets$Patient, triplets$Sample, sep="_") |
1000 | 1013 |
1001 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 1014 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T) |
1002 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 1015 |
1003 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 1016 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) |
1004 | 1017 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) |
1005 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 1018 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) |
1006 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 1019 |
1007 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 1020 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) |
1008 | 1021 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J) |
1009 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" | 1022 |
1010 | 1023 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")] |
1011 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T) | 1024 |
1012 | 1025 triplets = merge(triplets, min_cell_count, by="min_cell_paste") |
1013 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) | 1026 |
1014 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) | 1027 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) |
1015 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) | 1028 |
1016 | 1029 triplets = triplets[triplets$normalized_read_count >= min_cells,] |
1017 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) | 1030 |
1018 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J) | 1031 column_drops = c("min_cell_count", "min_cell_paste") |
1019 | 1032 |
1020 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")] | 1033 triplets = triplets[,!(colnames(triplets) %in% column_drops)] |
1021 | 1034 |
1022 triplets = merge(triplets, min_cell_count, by="min_cell_paste") | 1035 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) |
1023 | 1036 |
1024 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 | 1037 interval = intervalReads |
1025 | 1038 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
1026 triplets = triplets[triplets$normalized_read_count >= min_cells,] | 1039 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
1027 | 1040 |
1028 column_drops = c("min_cell_count", "min_cell_paste") | 1041 triplets = split(triplets, triplets$Patient, drop=T) |
1029 | 1042 print(nrow(triplets)) |
1030 triplets = triplets[,!(colnames(triplets) %in% column_drops)] | 1043 for(triplet in triplets){ |
1031 | 1044 samples = unique(triplet$Sample) |
1032 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) | 1045 one = triplet[triplet$Sample == samples[1],] |
1033 | 1046 two = triplet[triplet$Sample == samples[2],] |
1034 interval = intervalReads | 1047 three = triplet[triplet$Sample == samples[3],] |
1035 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 1048 |
1036 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 1049 print(paste(nrow(triplet), nrow(one), nrow(two), nrow(three))) |
1037 | 1050 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="normalized_read_count", T) |
1038 one = triplets[triplets$Sample == "14696_reg_BM",] | 1051 } |
1039 two = triplets[triplets$Sample == "24536_reg_BM",] | 1052 |
1040 three = triplets[triplets$Sample == "24062_reg_BM",] | 1053 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) |
1041 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T) | 1054 |
1042 | 1055 interval = intervalFreq |
1043 one = triplets[triplets$Sample == "16278_Left",] | 1056 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
1044 two = triplets[triplets$Sample == "26402_Left",] | 1057 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
1045 three = triplets[triplets$Sample == "26759_Left",] | 1058 |
1046 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T) | 1059 for(triplet in triplets){ |
1047 | 1060 samples = unique(triplet$Sample) |
1048 one = triplets[triplets$Sample == "16278_Right",] | 1061 one = triplet[triplet$Sample == samples[1],] |
1049 two = triplets[triplets$Sample == "26402_Right",] | 1062 two = triplet[triplet$Sample == samples[2],] |
1050 three = triplets[triplets$Sample == "26759_Right",] | 1063 three = triplet[triplet$Sample == samples[3],] |
1051 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T) | 1064 tripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="Frequency", F) |
1052 | 1065 } |
1053 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) | |
1054 | |
1055 interval = intervalFreq | |
1056 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | |
1057 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | |
1058 | |
1059 one = triplets[triplets$Sample == "14696_reg_BM",] | |
1060 two = triplets[triplets$Sample == "24536_reg_BM",] | |
1061 three = triplets[triplets$Sample == "24062_reg_BM",] | |
1062 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F) | |
1063 | |
1064 one = triplets[triplets$Sample == "16278_Left",] | |
1065 two = triplets[triplets$Sample == "26402_Left",] | |
1066 three = triplets[triplets$Sample == "26759_Left",] | |
1067 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F) | |
1068 | |
1069 one = triplets[triplets$Sample == "16278_Right",] | |
1070 two = triplets[triplets$Sample == "26402_Right",] | |
1071 three = triplets[triplets$Sample == "26759_Right",] | |
1072 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F) | |
1073 } else { | 1066 } else { |
1074 cat("", file="triplets.txt") | 1067 cat("", file="triplets.txt") |
1075 } | 1068 } |
1076 cat("</table></html>", file=logfile, append=T) | 1069 cat("</table></html>", file=logfile, append=T) |