comparison RScript.r @ 8:eb2aa7cffca3 draft default tip

Uploaded
author davidvanzessen
date Wed, 13 Dec 2017 06:00:44 -0500
parents 7ce82833977c
children
comparison
equal deleted inserted replaced
7:7ce82833977c 8:eb2aa7cffca3
418 p = p + geom_point(aes(colour=type), position="dodge") 418 p = p + geom_point(aes(colour=type), position="dodge")
419 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"])) 419 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"]))
420 } else { 420 } else {
421 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"])) 421 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,"Patient"], patient1[1,"Sample"], patient2[1,"Sample"], onShort, product[iter, "Titles"]))
422 } 422 }
423 png(paste(patient1[1,"Patient"], "_", patient1[1,"Sample"], "_", patient2[1,"Sample"], "_", onShort, "_", product[iter, "Titles"],"_scatter.png", sep="")) 423 file_name = paste(patient1[1,"Patient"], "_", patient1[1,"Sample"], "_", patient2[1,"Sample"], "_", onShort, "_", product[iter, "Titles"],"_scatter.png", sep="")
424 print(paste("Writing figure:", file_name))
425 png(file_name)
424 print(p) 426 print(p)
425 dev.off() 427 dev.off()
426 if(sum(both) > 0){ 428 if(sum(both) > 0){
427 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] 429 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
428 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) 430 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
448 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a") 450 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
449 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) 451 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
450 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0) 452 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
451 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both") 453 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
452 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines")) 454 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
453 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080) 455 file_name = paste(patient, "_", onShort, ".png", sep="")
456 print(paste("Writing figure:", file_name))
457 png(file_name, width=1920, height=1080)
454 print(plt) 458 print(plt)
455 dev.off() 459 dev.off()
456 #(t,r,b,l) 460 #(t,r,b,l)
457 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")]) 461 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
458 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a") 462 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
459 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) 463 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
460 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0) 464 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
461 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right") 465 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
462 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines")) 466 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
463 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080) 467
468 file_name = paste(patient, "_percent_", onShort, ".png", sep="")
469 print(paste("Writing figure:", file_name))
470 png(file_name, width=1920, height=1080)
464 print(plt) 471 print(plt)
465 dev.off() 472 dev.off()
466 473
467 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2) 474 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
468 patientResult$relativeValue = patientResult$value * 10 475 patientResult$relativeValue = patientResult$value * 10
472 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) 479 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
473 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9))) 480 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
474 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2) 481 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
475 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8) 482 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
476 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep="")) 483 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
477 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080) 484 file_name = paste(patient, "_", onShort, "_both.png", sep="")
485 print(paste("Writing figure:", file_name))
486 png(file_name, width=1920, height=1080)
478 print(plt) 487 print(plt)
479 dev.off() 488 dev.off()
480 } 489 }
481 490
482 if(length(patients) > 0){ 491 if(length(patients) > 0){
504 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) 513 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
505 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000)) 514 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
506 p = p + geom_point(aes(colour=type), position="jitter") 515 p = p + geom_point(aes(colour=type), position="jitter")
507 p = p + xlab("In one or both samples") + ylab("Reads") 516 p = p + xlab("In one or both samples") + ylab("Reads")
508 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample") 517 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
509 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) 518 file_name = "singles_reads_scatterplot.png"
519 print(paste("Writing figure:", file_name))
520 png(file_name, width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
510 print(p) 521 print(p)
511 dev.off() 522 dev.off()
512 523
513 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) 524 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
514 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100)) 525 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
515 p = p + geom_point(aes(colour=type), position="jitter") 526 p = p + geom_point(aes(colour=type), position="jitter")
516 p = p + xlab("In one or both samples") + ylab("Frequency") 527 p = p + xlab("In one or both samples") + ylab("Frequency")
517 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample") 528 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
518 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) 529 file_name = "singles_freq_scatterplot.png"
530 print(paste("Writing figure:", file_name))
531 png(file_name, width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
519 print(p) 532 print(p)
520 dev.off() 533 dev.off()
521 } else { 534 } else {
522 empty <- data.frame() 535 empty <- data.frame()
523 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample") 536 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
946 p = p + geom_point(aes(colour=type), position="jitter") 959 p = p + geom_point(aes(colour=type), position="jitter")
947 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) 960 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
948 } else { 961 } else {
949 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) 962 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
950 } 963 }
951 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) 964 file_name = paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")
965 print(paste("Writing figure:", file_name))
966 png(file_name)
952 print(p) 967 print(p)
953 dev.off() 968 dev.off()
954 } 969 }
955 if(sum(all) > 0){ 970 if(sum(all) > 0){
956 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] 971 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
980 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a") 995 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
981 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) 996 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
982 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0) 997 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
983 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All") 998 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
984 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines")) 999 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
985 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080) 1000 file_name = paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep="")
1001 print(paste("Writing figure:", file_name))
1002 png(file_name, width=1920, height=1080)
986 print(plt) 1003 print(plt)
987 dev.off() 1004 dev.off()
988 1005
989 fontSize = 4 1006 fontSize = 4
990 1007
998 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9))) 1015 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
999 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize) 1016 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
1000 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize) 1017 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
1001 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize) 1018 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
1002 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample") 1019 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
1003 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) 1020 file_name = paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep="")
1021 print(paste("Writing figure:", file_name))
1022 png(file_name, width=1920, height=1080)
1004 print(plt) 1023 print(plt)
1005 dev.off() 1024 dev.off()
1006 } 1025 }
1007 1026
1008 if(nrow(triplets) != 0){ 1027 if(nrow(triplets) != 0){