comparison dfhlp.R @ 0:e66bb061af06 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author prog
date Tue, 12 Jul 2016 12:02:37 -0400
parents
children fb9c0409d85c
comparison
equal deleted inserted replaced
-1:000000000000 0:e66bb061af06
1 if ( ! exists('remove.na.rows')) { # Do not load again if already loaded
2
3 source('strhlp.R')
4
5 #################
6 # RENAME COLUMN #
7 #################
8
9 rename.col <- function(df, cur, new) {
10
11 for (k in seq(cur)) {
12 i <- which(cur[k] == colnames(df))
13 if (length(i) == 1)
14 colnames(df)[i] <- new[k]
15 }
16
17 return(df)
18 }
19
20 ##################
21 # REMOVE NA ROWS #
22 ##################
23
24 remove.na.rows <- function(df) {
25 na.rows <- apply(is.na(df), MARGIN = 1, all)
26 return(df[ ! na.rows, , drop = FALSE])
27 }
28
29 ######################
30 # MOVE COLUMNS FIRST #
31 ######################
32
33 df.move.col.first <- function(df, cols) {
34 not.cols <- setdiff(names(df), cols)
35 df[c(cols, not.cols)]
36 }
37
38 #####################
39 # MOVE COLUMNS LAST #
40 #####################
41
42 df.move.col.last <- function(df, cols) {
43 not.cols <- setdiff(names(df), cols)
44 df[c(not.cols, cols)]
45 }
46
47 ##############
48 # READ TABLE #
49 ##############
50
51 df.read.table <- function(file, sep = "", header = TRUE, remove.na.rows = TRUE, check.names = TRUE, stringsAsFactors = TRUE, trim.header = FALSE, trim.values = FALSE, fileEncoding = "") {
52
53 # Call built-in read.table()
54 df <- read.table(file, sep = sep, header = header, check.names = check.names, stringsAsFactors = stringsAsFactors, fileEncoding = fileEncoding)
55
56 # Clean data frame
57 df <- df.clean(df, trim.colnames = trim.header, trim.values = trim.values, remove.na.rows = remove.na.rows)
58
59 return(df)
60 }
61
62 #################
63 # READ CSV FILE #
64 #################
65
66 # Read CSV file and return a data.frame.
67 # file The path to the CSV file.
68 # header If TRUE, use first line as header line.
69 # check.names If TRUE, correct header (column) names in the data frame, by replacing non-ASCII characters by dot.
70 # stringsAsFactors If TRUE, replace string values by factors.
71 # trim.header If TRUE, remove whitespaces at beginning and of header titles.
72 # trim.values If TRUE, remove whitespaces at beginning and of string values.
73 # remove.na.rows If TRUE, remove all lines that contain only NA values.
74 df.read.csv <- function(file, header = TRUE, remove.na.rows = TRUE, check.names = TRUE, stringsAsFactors = TRUE, trim.header = FALSE, trim.values = FALSE) {
75
76 # Call built-in read.csv()
77 df <- read.csv(file, header = header, check.names = check.names, stringsAsFactors = stringsAsFactors)
78
79 # Clean data frame
80 df <- df.clean(df, trim.colnames = trim.header, trim.values = trim.values, remove.na.rows = remove.na.rows)
81
82 return(df)
83 }
84
85 ##################
86 # WRITE TSV FILE #
87 ##################
88
89 df.write.tsv <- function(df, file, row.names = FALSE, col.names = TRUE) {
90 write.table(df, file = file, row.names = row.names, col.names = col.names, sep = "\t")
91 }
92
93 ####################
94 # CLEAN DATA FRAME #
95 ####################
96
97 df.clean <- function(df, trim.colnames = FALSE, trim.values = FALSE, remove.na.rows = FALSE) {
98
99 # Remove NA lines
100 if (remove.na.rows)
101 df <- remove.na.rows(df)
102
103 # Trim header
104 if (trim.colnames)
105 colnames(df) <- trim(colnames(df))
106
107 # Trim values
108 if (trim.values)
109 for (c in 1:ncol(df))
110 if (typeof(df[[c]]) == 'character')
111 df[[c]] <- trim(df[[c]])
112
113 return(df)
114 }
115
116 } # end of load safe guard