Mercurial > repos > prog > lcmsmatching

diff excelhlp.R @ 0:e66bb061af06 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author: prog
date: Tue, 12 Jul 2016 12:02:37 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/excelhlp.R	Tue Jul 12 12:02:37 2016 -0400
@@ -0,0 +1,83 @@
+if ( ! exists('read.excel')) { # Do not load again if already loaded
+	
+	source('strhlp.R')
+	source('dfhlp.R')
+	
+	###############
+	# GET NB ROWS #
+	###############
+	
+	get.nbrows <- function(file, tab) {
+	
+		library(rJava)
+		library(xlsxjars)
+		library(xlsx, quietly = TRUE)
+	
+		df <- read.xlsx(file, tab)
+		na_rows <- apply(is.na(df), MARGIN = 1, FUN = all) # look for rows that contain only NA values.
+		last_row <- tail(which(! na_rows), n = 1)
+		return(last_row)
+	}
+
+	##############
+	# READ EXCEL #
+	##############
+	
+	# Read Excel xlsx file
+	# file                  The path to the Excel file.
+	# sheet
+	# start.row
+	# end.row
+	# header                If TRUE, use first line as header line.
+	# check.names           If TRUE, correct header (column) names in the data frame, by replacing non-ASCII characters by dot.
+	# stringsAsFactors      If TRUE, replace string values by factors.
+	# trim.header           If TRUE, remove whitespaces at beginning and of header titles.
+	# trim.values           If TRUE, remove whitespaces at beginning and of string values.
+	# remove.na.rows        If TRUE, remove all lines that contain only NA values.
+	read.excel <- function(file, sheet, start.row = NULL, end.row = NULL, header = TRUE, remove.na.rows = TRUE, check.names = TRUE, stringsAsFactors = TRUE, trim.header = FALSE, trim.values = FALSE, col.index = NULL) {
+	
+		library(rJava)
+		library(xlsxjars)
+		library(xlsx, quietly = TRUE)
+	
+		# Check that start row and end row exist
+		if ( ! is.null(start.row) || ! is.null(end.row)) {
+			nb_rows <- get.nbrows(file, sheet)
+			if ( ! is.null(start.row) && start.row > nb_rows)
+				return(NULL)
+			if ( ! is.null(end.row) && end.row > nb_rows)
+				return(NULL)
+		}
+	
+		# Call xlsx package
+		df <- read.xlsx(file, sheet, startRow = start.row, endRow = end.row, header = header, check.names = check.names, stringsAsFactors = stringsAsFactors, colIndex = col.index)
+
+		# Remove column default names if header was set to false
+		if ( ! header)
+			colnames(df) <- NULL
+
+		# Clean data frame
+		df <- df.clean(df, trim.colnames = trim.header, trim.values = trim.values, remove.na.rows = remove.na.rows)
+	
+		return(df)
+	}
+	
+	#######################
+	# CHECK IF TAB EXISTS #
+	#######################
+	
+	tab.exists <- function(file, tab) {
+	
+		if (is.null(file) || is.na(file) || is.null(tab) || is.na(tab))
+			return(FALSE)
+	
+		library(rJava)
+		library(xlsxjars)
+		library(xlsx, quietly = TRUE)
+
+		wb <- loadWorkbook(file)
+		sheets <- getSheets(wb)
+		return(tab %in% names(sheets))
+	}
+
+} # end of load safe guard
author	prog
date	Tue, 12 Jul 2016 12:02:37 -0400
parents
children