Mercurial > repos > ynewton > plot_distribution

--- a/plot_distribution.r	Thu Dec 13 11:25:25 2012 -0500
+++ b/plot_distribution.r	Fri Jan 18 12:18:04 2013 -0500
@@ -1,29 +1,14 @@
 #!/usr/bin/Rscript

 #usage, options and doc goes here
-argspec <- c("normalize.r - takes any flat file and normalizes the rows or the columns using various normalizations (median_shift, mean_shift, t_statistic (z-score), exp_fit, normal_fit, weibull_0.5_fit, weibull_1_fit, weibull_1.5_fit, weibull_5_fit). Requires a single header line and a single cloumn of annotation.
+argspec <- c("plot_distribution.r - plots distribution of the value in the list or the matrix. Assumes the first line and the first column are annotations.
 Usage:
-    normalize.r input.tab norm_type norm_by > output.tab
+    plot_distribution.r input_matrix.tab output_file.pdf
 Example:
-	Rscript normalize.r test_matrix.tab median_shift column > output.tab
-	Rscript normalize.r test_matrix.tab mean_shift row normals.tab > output.tab
+	Rscript plot_distribution.r input_matrix.tab output_file.pdf
 Options:
-	input matrix (annotated by row and column names)
-	normalization type; available options:
-		median_shift - shifts all values by the median or the row/column if no normals are specified, otherwise shifts by the median of normals
-		mean_shift - shifts all values by the mean or the row/column if no normals are specified, otherwise shifts by the mean of normals
-		t_statistic - converts all values to z-scores; if normals are specified then converts to z-scores within normal and non-normal classes separately
-		exp_fit - (only by column) ranks data and transforms exponential CDF
-		normal_fit - (only by column) ranks data and transforms normal CDF
-		weibull_0.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 0.5
-		weibull_1_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1
-		weibull_1.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1.5
-		weibull_5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 5
-	normalization by:
-		row
-		column
-	normals_file is an optional parameter which contains a list of column headers from the input matrix, which should be considered as normals
-	output file is specified through redirect character >")
+	input file name
+	output file name")

 read_matrix <- function(in_file){
 	header <- strsplit(readLines(con=in_file, n=1), "\t")[[1]]
@@ -40,12 +25,15 @@
 	sink('/dev/null')

 	input_data <- read_matrix(in_file)
+	input_data.df <- as.data.frame(input_data)
+	input_data.lst <- as.list(input_data.df)
+	input_data.unlst <- unlist(input_data.lst)
+	input_data.nona <- input_data.unlst[!is.na(input_data.unlst)]

 	pdf(out_file, bg="white")
 	par(mfrow=c(1,1))
-	hist(input_data, col="lightblue", labels=TRUE, main="Histogram", xlab="")
-	plot(density(input_data), type="l", col="blue", main="Density")
-	dev.off()
+	hist(input_data.nona, col="lightblue", labels=TRUE, main="Histogram", xlab="")
+	plot(density(input_data.nona), type="l", col="blue", main="Density")
+	dev.off()
 }
-
 main(commandArgs(TRUE))
\ No newline at end of file