Mercurial > repos > prog > lcmsmatching
diff excelhlp.R @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author | prog |
---|---|
date | Fri, 22 Feb 2019 16:04:22 -0500 |
parents | fb9c0409d85c |
children |
line wrap: on
line diff
--- a/excelhlp.R Wed Apr 19 10:00:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,83 +0,0 @@ -if ( ! exists('read.excel')) { # Do not load again if already loaded - - source('strhlp.R') - source('dfhlp.R') - - ############### - # GET NB ROWS # - ############### - - get.nbrows <- function(file, tab) { - - library(rJava) - library(xlsxjars) - library(xlsx, quietly = TRUE) - - df <- read.xlsx(file, tab) - na_rows <- apply(is.na(df), MARGIN = 1, FUN = all) # look for rows that contain only NA values. - last_row <- tail(which(! na_rows), n = 1) - return(last_row) - } - - ############## - # READ EXCEL # - ############## - - # Read Excel xlsx file - # file The path to the Excel file. - # sheet - # start.row - # end.row - # header If TRUE, use first line as header line. - # check.names If TRUE, correct header (column) names in the data frame, by replacing non-ASCII characters by dot. - # stringsAsFactors If TRUE, replace string values by factors. - # trim.header If TRUE, remove whitespaces at beginning and of header titles. - # trim.values If TRUE, remove whitespaces at beginning and of string values. - # remove.na.rows If TRUE, remove all lines that contain only NA values. - read.excel <- function(file, sheet, start.row = NULL, end.row = NULL, header = TRUE, remove.na.rows = TRUE, check.names = TRUE, stringsAsFactors = TRUE, trim.header = FALSE, trim.values = FALSE, col.index = NULL) { - - library(rJava) - library(xlsxjars) - library(xlsx, quietly = TRUE) - - # Check that start row and end row exist - if ( ! is.null(start.row) || ! is.null(end.row)) { - nb_rows <- get.nbrows(file, sheet) - if ( ! is.null(start.row) && start.row > nb_rows) - return(NULL) - if ( ! is.null(end.row) && end.row > nb_rows) - return(NULL) - } - - # Call xlsx package - df <- read.xlsx(file, sheet, startRow = start.row, endRow = end.row, header = header, check.names = check.names, stringsAsFactors = stringsAsFactors, colIndex = col.index) - - # Remove column default names if header was set to false - if ( ! header) - colnames(df) <- NULL - - # Clean data frame - df <- df.clean(df, trim.colnames = trim.header, trim.values = trim.values, remove.na.rows = remove.na.rows) - - return(df) - } - - ####################### - # CHECK IF TAB EXISTS # - ####################### - - tab.exists <- function(file, tab) { - - if (is.null(file) || is.na(file) || is.null(tab) || is.na(tab)) - return(FALSE) - - library(rJava) - library(xlsxjars) - library(xlsx, quietly = TRUE) - - wb <- loadWorkbook(file) - sheets <- getSheets(wb) - return(tab %in% names(sheets)) - } - -} # end of load safe guard