Mercurial > repos > prog > lcmsmatching
comparison dfhlp.R @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author | prog |
---|---|
date | Fri, 22 Feb 2019 16:04:22 -0500 |
parents | fb9c0409d85c |
children |
comparison
equal
deleted
inserted
replaced
5:fb9c0409d85c | 6:f86fec07f392 |
---|---|
1 if ( ! exists('remove.na.rows')) { # Do not load again if already loaded | |
2 | |
3 source('strhlp.R') | |
4 | |
5 ################# | |
6 # RENAME COLUMN # | |
7 ################# | |
8 | |
9 rename.col <- function(df, cur, new) { | |
10 | |
11 for (k in seq(cur)) { | |
12 i <- which(cur[k] == colnames(df)) | |
13 if (length(i) == 1) | |
14 colnames(df)[i] <- new[k] | |
15 } | |
16 | |
17 return(df) | |
18 } | |
19 | |
20 ################## | |
21 # REMOVE NA ROWS # | |
22 ################## | |
23 | |
24 remove.na.rows <- function(df) { | |
25 na.rows <- apply(is.na(df), MARGIN = 1, all) | |
26 return(df[ ! na.rows, , drop = FALSE]) | |
27 } | |
28 | |
29 ###################### | |
30 # MOVE COLUMNS FIRST # | |
31 ###################### | |
32 | |
33 df.move.col.first <- function(df, cols) { | |
34 not.cols <- setdiff(names(df), cols) | |
35 df[c(cols, not.cols)] | |
36 } | |
37 | |
38 ##################### | |
39 # MOVE COLUMNS LAST # | |
40 ##################### | |
41 | |
42 df.move.col.last <- function(df, cols) { | |
43 not.cols <- setdiff(names(df), cols) | |
44 df[c(not.cols, cols)] | |
45 } | |
46 | |
47 ################# | |
48 # READ CSV FILE # | |
49 ################# | |
50 | |
51 # Read CSV file and return a data.frame. | |
52 # file The path to the CSV file. | |
53 # header If TRUE, use first line as header line. | |
54 # check.names If TRUE, correct header (column) names in the data frame, by replacing non-ASCII characters by dot. | |
55 # stringsAsFactors If TRUE, replace string values by factors. | |
56 # trim.header If TRUE, remove whitespaces at beginning and of header titles. | |
57 # trim.values If TRUE, remove whitespaces at beginning and of string values. | |
58 # remove.na.rows If TRUE, remove all lines that contain only NA values. | |
59 df.read.csv <- function(file, header = TRUE, remove.na.rows = TRUE, check.names = TRUE, stringsAsFactors = TRUE, trim.header = FALSE, trim.values = FALSE) { | |
60 | |
61 # Call built-in read.csv() | |
62 df <- read.csv(file, header = header, check.names = check.names, stringsAsFactors = stringsAsFactors) | |
63 | |
64 # Clean data frame | |
65 df <- df.clean(df, trim.colnames = trim.header, trim.values = trim.values, remove.na.rows = remove.na.rows) | |
66 | |
67 return(df) | |
68 } | |
69 | |
70 ################## | |
71 # WRITE TSV FILE # | |
72 ################## | |
73 | |
74 df.write.tsv <- function(df, file, row.names = FALSE, col.names = TRUE) { | |
75 write.table(df, file = file, row.names = row.names, col.names = col.names, sep = "\t") | |
76 } | |
77 | |
78 #################### | |
79 # CLEAN DATA FRAME # | |
80 #################### | |
81 | |
82 df.clean <- function(df, trim.colnames = FALSE, trim.values = FALSE, remove.na.rows = FALSE) { | |
83 | |
84 # Remove NA lines | |
85 if (remove.na.rows) | |
86 df <- remove.na.rows(df) | |
87 | |
88 # Trim header | |
89 if (trim.colnames) | |
90 colnames(df) <- trim(colnames(df)) | |
91 | |
92 # Trim values | |
93 if (trim.values) | |
94 for (c in 1:ncol(df)) | |
95 if (typeof(df[[c]]) == 'character') | |
96 df[[c]] <- trim(df[[c]]) | |
97 | |
98 return(df) | |
99 } | |
100 | |
101 } # end of load safe guard |