Mercurial > repos > ynewton > plot_distribution
view plot_distribution.r @ 1:81d08949af15 draft
Uploaded
author | ynewton |
---|---|
date | Thu, 13 Dec 2012 11:25:25 -0500 |
parents | f91478b63ec6 |
children | cf8d0d54bc78 |
line wrap: on
line source
#!/usr/bin/Rscript #usage, options and doc goes here argspec <- c("normalize.r - takes any flat file and normalizes the rows or the columns using various normalizations (median_shift, mean_shift, t_statistic (z-score), exp_fit, normal_fit, weibull_0.5_fit, weibull_1_fit, weibull_1.5_fit, weibull_5_fit). Requires a single header line and a single cloumn of annotation. Usage: normalize.r input.tab norm_type norm_by > output.tab Example: Rscript normalize.r test_matrix.tab median_shift column > output.tab Rscript normalize.r test_matrix.tab mean_shift row normals.tab > output.tab Options: input matrix (annotated by row and column names) normalization type; available options: median_shift - shifts all values by the median or the row/column if no normals are specified, otherwise shifts by the median of normals mean_shift - shifts all values by the mean or the row/column if no normals are specified, otherwise shifts by the mean of normals t_statistic - converts all values to z-scores; if normals are specified then converts to z-scores within normal and non-normal classes separately exp_fit - (only by column) ranks data and transforms exponential CDF normal_fit - (only by column) ranks data and transforms normal CDF weibull_0.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 0.5 weibull_1_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1 weibull_1.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1.5 weibull_5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 5 normalization by: row column normals_file is an optional parameter which contains a list of column headers from the input matrix, which should be considered as normals output file is specified through redirect character >") read_matrix <- function(in_file){ header <- strsplit(readLines(con=in_file, n=1), "\t")[[1]] cl.cols<- 1:length(header) > 1 data_matrix.df <- read.delim(in_file, header=TRUE, row.names=NULL, stringsAsFactors=FALSE, na.strings="NA", check.names=FALSE) data_matrix <- as.matrix(data_matrix.df[,cl.cols]) rownames(data_matrix) <- data_matrix.df[,1] return(data_matrix) } main <- function(argv) { in_file <- argv[1] out_file <- argv[2] sink('/dev/null') input_data <- read_matrix(in_file) pdf(out_file, bg="white") par(mfrow=c(1,1)) hist(input_data, col="lightblue", labels=TRUE, main="Histogram", xlab="") plot(density(input_data), type="l", col="blue", main="Density") dev.off() } main(commandArgs(TRUE))