# HG changeset patch
# User davidvanzessen
# Date 1491912137 14400
# Node ID 77a7ac76c7b95f4522c7fb2717abc18ea91788e2
# Parent 1cf60ae234b4e39fed603282f5f53d398a7f42e5
Uploaded
diff -r 1cf60ae234b4 -r 77a7ac76c7b9 plot_pdf.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plot_pdf.r Tue Apr 11 08:02:17 2017 -0400
@@ -0,0 +1,17 @@
+library(ggplot2)
+
+args <- commandArgs(trailingOnly = TRUE)
+print(args)
+
+input = args[1]
+outputdir = args[2]
+setwd(outputdir)
+
+load(input)
+
+print(names(pdfplots))
+
+for(n in names(pdfplots)){
+ print(paste("n:", n))
+ ggsave(pdfplots[[n]], file=n, device="pdf")
+}
diff -r 1cf60ae234b4 -r 77a7ac76c7b9 shm_csr.py
--- a/shm_csr.py Tue Mar 28 08:25:36 2017 -0400
+++ b/shm_csr.py Tue Apr 11 08:02:17 2017 -0400
@@ -80,7 +80,7 @@
IDlist += [ID]
-print mutationList, linecount
+#print mutationList, linecount
AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent
if AALength < 60:
diff -r 1cf60ae234b4 -r 77a7ac76c7b9 shm_csr.r
--- a/shm_csr.r Tue Mar 28 08:25:36 2017 -0400
+++ b/shm_csr.r Tue Apr 11 08:02:17 2017 -0400
@@ -124,6 +124,8 @@
regions = c("CDR2", "FR3")
}
+pdfplots = list() #save() this later to create the pdf plots in another script (maybe avoids the "address (nil), cause memory not mapped")
+
sum_by_row = function(x, columns) { sum(as.numeric(x[columns]), na.rm=T) }
print("aggregating data into new columns")
@@ -308,7 +310,7 @@
print(p)
dev.off()
- ggsave(paste("transitions_stacked_", name, ".pdf", sep=""))
+ pdfplots[[paste("transitions_stacked_", name, ".pdf", sep="")]] <<- p
png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
p = ggplot(transition2, aes(factor(reorder(variable, -order.y)), factor(reorder(id, -order.x)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap
@@ -316,7 +318,7 @@
print(p)
dev.off()
- ggsave(paste("transitions_heatmap_", name, ".pdf", sep=""))
+ pdfplots[[paste("transitions_heatmap_", name, ".pdf", sep="")]] <<- p
} else {
#print("No data to plot")
}
@@ -400,7 +402,7 @@
print(pc)
dev.off()
- ggsave("IGA.pdf", pc)
+ pdfplots[["IGA.pdf"]] <- pc
}
print("Plotting IGG piechart")
@@ -423,7 +425,7 @@
print(pc)
dev.off()
- ggsave("IGG.pdf", pc)
+ pdfplots[["IGG.pdf"]] <- pc
}
print("Plotting scatterplot")
@@ -445,7 +447,7 @@
print(p)
dev.off()
-ggsave("scatter.pdf", p)
+pdfplots[["scatter.pdf"]] <- p
write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
@@ -471,7 +473,9 @@
print(p)
dev.off()
-ggsave("frequency_ranges.pdf", p)
+pdfplots[["frequency_ranges.pdf"]] <- p
+
+save(pdfplots, file="pdfplots.RData")
frequency_bins_data_by_class = frequency_bins_data
diff -r 1cf60ae234b4 -r 77a7ac76c7b9 wrapper.sh
--- a/wrapper.sh Tue Mar 28 08:25:36 2017 -0400
+++ b/wrapper.sh Tue Apr 11 08:02:17 2017 -0400
@@ -176,6 +176,13 @@
echo "R mutation analysis"
Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1
+echo "---------------- plot_pdfs.r ----------------"
+echo "---------------- plot_pdfs.r ----------------
" >> $log
+
+echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir 2>&1"
+
+Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" 2>&1
+
echo "---------------- shm_csr.py ----------------"
echo "---------------- shm_csr.py ----------------
" >> $log
@@ -249,7 +256,7 @@
echo "---------------- pattern_plots.r ----------------"
echo "---------------- pattern_plots.r ----------------
" >> $log
- Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/abolute_mutations $outdir/shm_overview.txt 2>&1
+ Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/absolute_mutations $outdir/shm_overview.txt 2>&1
echo "
info | " >> $output @@ -300,7 +307,7 @@ echo "|
---|---|
Base count for every sequence | View |
The data used to generate the percentage of mutations in AID and pol eta motives plot | Download |
The data used to generate the relative mutation patterns plot | Download |
The data used to generate the absolute mutation patterns plot | Download |
The data used to generate the absolute mutation patterns plot | Download |
SHM Frequency | |
The data generate the frequency scatter plot | Download |