# HG changeset patch
# User ynewton
# Date 1355415909 18000
# Node ID f91478b63ec6d7bc7a7f6b16ec6df71e53544e29

Uploaded

diff -r 000000000000 -r f91478b63ec6 plot_distribution.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plot_distribution.r	Thu Dec 13 11:25:09 2012 -0500
@@ -0,0 +1,51 @@
+#!/usr/bin/Rscript
+
+#usage, options and doc goes here
+argspec <- c("normalize.r - takes any flat file and normalizes the rows or the columns using various normalizations (median_shift, mean_shift, t_statistic (z-score), exp_fit, normal_fit, weibull_0.5_fit, weibull_1_fit, weibull_1.5_fit, weibull_5_fit). Requires a single header line and a single cloumn of annotation.
+Usage:
+    normalize.r input.tab norm_type norm_by > output.tab
+Example:
+	Rscript normalize.r test_matrix.tab median_shift column > output.tab
+	Rscript normalize.r test_matrix.tab mean_shift row normals.tab > output.tab
+Options:
+	input matrix (annotated by row and column names)
+	normalization type; available options:
+		median_shift - shifts all values by the median or the row/column if no normals are specified, otherwise shifts by the median of normals
+		mean_shift - shifts all values by the mean or the row/column if no normals are specified, otherwise shifts by the mean of normals
+		t_statistic - converts all values to z-scores; if normals are specified then converts to z-scores within normal and non-normal classes separately 
+		exp_fit - (only by column) ranks data and transforms exponential CDF
+		normal_fit - (only by column) ranks data and transforms normal CDF
+		weibull_0.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 0.5
+		weibull_1_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1
+		weibull_1.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1.5
+		weibull_5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 5
+	normalization by:
+		row
+		column
+	normals_file is an optional parameter which contains a list of column headers from the input matrix, which should be considered as normals
+	output file is specified through redirect character >")
+
+read_matrix <- function(in_file){
+	header <- strsplit(readLines(con=in_file, n=1), "\t")[[1]]
+	cl.cols<- 1:length(header) > 1
+	data_matrix.df <- read.delim(in_file, header=TRUE, row.names=NULL, stringsAsFactors=FALSE, na.strings="NA", check.names=FALSE)
+	data_matrix <- as.matrix(data_matrix.df[,cl.cols])
+	rownames(data_matrix) <- data_matrix.df[,1]
+	return(data_matrix)
+}
+
+main <- function(argv) {
+	in_file <- argv[1]
+	out_file <- argv[2]
+	sink('/dev/null') 
+	
+	input_data <- read_matrix(in_file)
+	
+	pdf(out_file, bg="white")
+	par(mfrow=c(1,1))
+	hist(input_data, col="lightblue", labels=TRUE, main="Histogram", xlab="")
+	plot(density(input_data), type="l", col="blue", main="Density")
+	dev.off()	
+}
+
+main(commandArgs(TRUE))	
\ No newline at end of file