comparison excelhlp.R @ 0:e66bb061af06 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author prog
date Tue, 12 Jul 2016 12:02:37 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e66bb061af06
1 if ( ! exists('read.excel')) { # Do not load again if already loaded
2
3 source('strhlp.R')
4 source('dfhlp.R')
5
6 ###############
7 # GET NB ROWS #
8 ###############
9
10 get.nbrows <- function(file, tab) {
11
12 library(rJava)
13 library(xlsxjars)
14 library(xlsx, quietly = TRUE)
15
16 df <- read.xlsx(file, tab)
17 na_rows <- apply(is.na(df), MARGIN = 1, FUN = all) # look for rows that contain only NA values.
18 last_row <- tail(which(! na_rows), n = 1)
19 return(last_row)
20 }
21
22 ##############
23 # READ EXCEL #
24 ##############
25
26 # Read Excel xlsx file
27 # file The path to the Excel file.
28 # sheet
29 # start.row
30 # end.row
31 # header If TRUE, use first line as header line.
32 # check.names If TRUE, correct header (column) names in the data frame, by replacing non-ASCII characters by dot.
33 # stringsAsFactors If TRUE, replace string values by factors.
34 # trim.header If TRUE, remove whitespaces at beginning and of header titles.
35 # trim.values If TRUE, remove whitespaces at beginning and of string values.
36 # remove.na.rows If TRUE, remove all lines that contain only NA values.
37 read.excel <- function(file, sheet, start.row = NULL, end.row = NULL, header = TRUE, remove.na.rows = TRUE, check.names = TRUE, stringsAsFactors = TRUE, trim.header = FALSE, trim.values = FALSE, col.index = NULL) {
38
39 library(rJava)
40 library(xlsxjars)
41 library(xlsx, quietly = TRUE)
42
43 # Check that start row and end row exist
44 if ( ! is.null(start.row) || ! is.null(end.row)) {
45 nb_rows <- get.nbrows(file, sheet)
46 if ( ! is.null(start.row) && start.row > nb_rows)
47 return(NULL)
48 if ( ! is.null(end.row) && end.row > nb_rows)
49 return(NULL)
50 }
51
52 # Call xlsx package
53 df <- read.xlsx(file, sheet, startRow = start.row, endRow = end.row, header = header, check.names = check.names, stringsAsFactors = stringsAsFactors, colIndex = col.index)
54
55 # Remove column default names if header was set to false
56 if ( ! header)
57 colnames(df) <- NULL
58
59 # Clean data frame
60 df <- df.clean(df, trim.colnames = trim.header, trim.values = trim.values, remove.na.rows = remove.na.rows)
61
62 return(df)
63 }
64
65 #######################
66 # CHECK IF TAB EXISTS #
67 #######################
68
69 tab.exists <- function(file, tab) {
70
71 if (is.null(file) || is.na(file) || is.null(tab) || is.na(tab))
72 return(FALSE)
73
74 library(rJava)
75 library(xlsxjars)
76 library(xlsx, quietly = TRUE)
77
78 wb <- loadWorkbook(file)
79 sheets <- getSheets(wb)
80 return(tab %in% names(sheets))
81 }
82
83 } # end of load safe guard