Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 18:5d11c9139a55 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Wed, 21 Dec 2016 11:53:03 -0500 |
| parents | da95be204ebc |
| children | 9185c3dfc679 |
comparison
equal
deleted
inserted
replaced
| 17:da95be204ebc | 18:5d11c9139a55 |
|---|---|
| 157 print(sample.colors) | 157 print(sample.colors) |
| 158 | 158 |
| 159 | 159 |
| 160 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive | 160 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive |
| 161 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) | 161 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 162 write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) | 162 #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) |
| 163 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) | 163 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) |
| 164 | 164 |
| 165 #write the samples to a file | 165 #write the samples to a file |
| 166 sampleFile <- file("samples.txt") | 166 sampleFile <- file("samples.txt") |
| 167 un = unique(inputdata$Sample) | 167 un = unique(inputdata$Sample) |
| 295 | 295 |
| 296 pV = ggplot(PRODFV) | 296 pV = ggplot(PRODFV) |
| 297 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 297 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
| 298 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") + scale_fill_manual(values=sample.colors) | 298 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") + scale_fill_manual(values=sample.colors) |
| 299 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 299 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 300 write.table(x=PRODFV, file="VFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 300 write.table(x=PRODFV, file="VFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 301 | 301 |
| 302 png("VPlot.png",width = 1280, height = 720) | 302 png("VPlot.png",width = 1280, height = 720) |
| 303 pV | 303 pV |
| 304 dev.off(); | 304 dev.off(); |
| 305 | 305 |
| 306 if(useD){ | 306 if(useD){ |
| 307 pD = ggplot(PRODFD) | 307 pD = ggplot(PRODFD) |
| 308 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 308 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
| 309 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) | 309 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) |
| 310 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 310 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 311 write.table(x=PRODFD, file="DFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 311 write.table(x=PRODFD, file="DFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 312 | 312 |
| 313 png("DPlot.png",width = 800, height = 600) | 313 png("DPlot.png",width = 800, height = 600) |
| 314 print(pD) | 314 print(pD) |
| 315 dev.off(); | 315 dev.off(); |
| 316 } | 316 } |
| 317 | 317 |
| 318 pJ = ggplot(PRODFJ) | 318 pJ = ggplot(PRODFJ) |
| 319 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 319 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
| 320 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) | 320 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) |
| 321 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 321 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 322 write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 322 write.table(x=PRODFJ, file="JFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 323 | 323 |
| 324 png("JPlot.png",width = 800, height = 600) | 324 png("JPlot.png",width = 800, height = 600) |
| 325 pJ | 325 pJ |
| 326 dev.off(); | 326 dev.off(); |
| 327 | 327 |
| 342 scale_fill_manual(values=sample.colors) + | 342 scale_fill_manual(values=sample.colors) + |
| 343 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 343 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 344 png("VFPlot.png") | 344 png("VFPlot.png") |
| 345 VPlot | 345 VPlot |
| 346 dev.off(); | 346 dev.off(); |
| 347 write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 347 write.table(x=VGenes, file="VFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 348 | 348 |
| 349 if(useD){ | 349 if(useD){ |
| 350 DGenes = PRODF[,c("Sample", "Top.D.Gene")] | 350 DGenes = PRODF[,c("Sample", "Top.D.Gene")] |
| 351 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) | 351 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) |
| 352 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) | 352 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) |
| 360 scale_fill_manual(values=sample.colors) + | 360 scale_fill_manual(values=sample.colors) + |
| 361 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 361 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 362 png("DFPlot.png") | 362 png("DFPlot.png") |
| 363 print(DPlot) | 363 print(DPlot) |
| 364 dev.off(); | 364 dev.off(); |
| 365 write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 365 write.table(x=DGenes, file="DFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 366 } | 366 } |
| 367 | 367 |
| 368 # ---------------------- Plotting the cdr3 length ---------------------- | 368 # ---------------------- Plotting the cdr3 length ---------------------- |
| 369 | 369 |
| 370 print("Report Clonality - CDR3 length plot") | 370 print("Report Clonality - CDR3 length plot") |
| 577 # ---------------------- calculating the clonality score ---------------------- | 577 # ---------------------- calculating the clonality score ---------------------- |
| 578 | 578 |
| 579 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available | 579 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available |
| 580 { | 580 { |
| 581 print("Report Clonality - Clonality") | 581 print("Report Clonality - Clonality") |
| 582 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) | 582 write.table(clonalityFrame, "clonalityComplete.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 583 if(clonality_method == "boyd"){ | 583 if(clonality_method == "boyd"){ |
| 584 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) | 584 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) |
| 585 | 585 |
| 586 for (sample in samples){ | 586 for (sample in samples){ |
| 587 res = data.frame(paste=character(0)) | 587 res = data.frame(paste=character(0)) |
| 869 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 869 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 870 | 870 |
| 871 png("AAComposition.png",width = 1280, height = 720) | 871 png("AAComposition.png",width = 1280, height = 720) |
| 872 AAfreqplot | 872 AAfreqplot |
| 873 dev.off() | 873 dev.off() |
| 874 write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T) | 874 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) |
| 875 | 875 |
| 876 # ---------------------- AA median CDR3 length ---------------------- | 876 # ---------------------- AA median CDR3 length ---------------------- |
| 877 | 877 |
| 878 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")]) | 878 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")]) |
| 879 write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 879 write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
