annotate checkformat_script.R @ 1:e194eec8e70c draft

planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
author ethevenot
date Sat, 06 Aug 2016 11:54:28 -0400
parents
children 80a38d36f946
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
1 ## Etienne Thevenot
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
2 ## CEA, MetaboHUB Paris
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
3 ## etienne.thevenot@cea.fr
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
4
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
5
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
6 ## Reads the dataMatrix, sampleMetadata, and variableMetadata .tsv files
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
7 ## and checks the formats
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
8 readAndCheckF <- function(datFilC="dataMatrix.tsv",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
9 samFilC="sampleMetadata.tsv",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
10 varFilC="variableMetadata.tsv") {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
11
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
12 ## options
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
13
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
14 optStrAsFacL <- options()[["stringsAsFactors"]]
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
15 options(stringsAsFactors = FALSE)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
16
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
17 ## checking that the tables have no duplicated row or column names
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
18
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
19 for(tabC in c("dat", "sam", "var")) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
20
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
21 tabNamC <- switch(tabC, dat="dataMatrix", sam="sampleMetadata", var="variableMetadata")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
22
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
23 rowVc <- read.table(eval(parse(text=paste0(tabC, "FilC"))),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
24 check.names = FALSE,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
25 header = TRUE,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
26 sep = "\t")[, 1]
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
27
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
28 colVc <- unlist(read.table(eval(parse(text=paste0(tabC, "FilC"))),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
29 check.names = FALSE,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
30 nrow=1,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
31 sep = "\t"))[-1]
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
32
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
33 if(any(duplicated(rowVc)))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
34 stop("The following row name(s) is/are duplicated in the ",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
35 tabNamC,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
36 " table: '",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
37 paste(rowVc[duplicated(rowVc)], collapse="', '"), "'",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
38 call.=FALSE)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
39
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
40 if(any(duplicated(colVc)))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
41 stop("The following column name(s) is/are duplicated in the ",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
42 tabNamC,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
43 " table: '",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
44 paste(colVc[duplicated(colVc)], collapse="', '"), "'",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
45 call.=FALSE)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
46
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
47 rowMakVc <- make.names(rowVc, unique = TRUE)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
48
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
49 rowDifVl <- rowVc != rowMakVc
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
50
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
51 if(any(rowDifVl)) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
52 rowDifDF <- data.frame(row = 1:length(rowVc),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
53 actual = rowVc,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
54 preferred = rowMakVc)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
55 rowDifDF <- rowDifDF[rowDifVl, , drop = FALSE]
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
56 cat("\n\nWarning: The following row names of the ",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
57 tabNamC,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
58 " table are not in the standard R format, which may result in errors when loading the data in some of the W4M modules:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
59 print(rowDifDF)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
60 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
61
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
62 colMakVc <- make.names(colVc, unique = TRUE)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
63
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
64 colDifVl <- colVc != colMakVc
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
65
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
66 if(any(colDifVl)) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
67 colDifDF <- data.frame(col = 1:length(colVc),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
68 actual = colVc,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
69 preferred = colMakVc)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
70 colDifDF <- colDifDF[colDifVl, , drop = FALSE]
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
71 cat("\n\nWarning: The following column names of the ",
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
72 tabNamC,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
73 " table are not in the standard R format, which may result in errors when loading the data in some of the W4M modules:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
74 print(colDifDF)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
75 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
76 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
77
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
78 ## reading tables
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
79
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
80 datMN <- t(as.matrix(read.table(datFilC,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
81 check.names = FALSE,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
82 header = TRUE,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
83 row.names = 1,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
84 sep = "\t")))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
85
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
86 samDF <- read.table(samFilC,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
87 check.names = FALSE,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
88 header = TRUE,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
89 row.names = 1,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
90 sep = "\t")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
91
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
92 varDF <- read.table(varFilC,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
93 check.names = FALSE,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
94 header = TRUE,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
95 row.names = 1,
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
96 sep = "\t")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
97
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
98 ## checking formats
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
99
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
100 chkL <- TRUE
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
101
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
102 if(!identical(rownames(datMN), rownames(samDF))) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
103 ## checking sample names
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
104
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
105 chkL <- FALSE
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
106
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
107 datSamDifVc <- setdiff(rownames(datMN), rownames(samDF))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
108
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
109 if(length(datSamDifVc)) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
110 cat("\nThe following samples were found in the dataMatrix column names but not in the sampleMetadata row names:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
111 print(cbind.data.frame(col = as.numeric(sapply(datSamDifVc, function(samC) which(rownames(datMN) == samC))),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
112 name = datSamDifVc))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
113 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
114
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
115 samDatDifVc <- setdiff(rownames(samDF), rownames(datMN))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
116
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
117 if(length(samDatDifVc)) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
118 cat("\n\nThe following samples were found in the sampleMetadata row names but not in the dataMatrix column names:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
119 print(cbind.data.frame(row = as.numeric(sapply(samDatDifVc, function(samC) which(rownames(samDF) == samC))),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
120 name = samDatDifVc))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
121 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
122
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
123 if(nrow(datMN) != nrow(samDF)) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
124 cat("\n\nThe dataMatrix has ", nrow(datMN), " columns (ie samples) whereas the sampleMetadata has ", nrow(samDF), " rows\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
125 } else if(identical(gsub("^X", "", rownames(datMN)), rownames(samDF))) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
126 cat("\n\nThe dataMatrix column names start with an 'X' but not the sampleMetadata row names\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
127 } else if(identical(gsub("^X", "", rownames(samDF)), rownames(datMN))) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
128 cat("\n\nThe sampleMetadata row names start with an 'X' but not the dataMatrix column names\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
129 } else if(identical(sort(rownames(datMN)), sort(rownames(samDF)))) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
130 cat("\n\nThe dataMatrix column names and the sampleMetadata row names are not in the same order:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
131 print(cbind.data.frame(indice = 1:nrow(datMN),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
132 dataMatrix_columnnames=rownames(datMN),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
133 sampleMetadata_rownames=rownames(samDF))[rownames(datMN) != rownames(samDF), , drop = FALSE])
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
134 } else {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
135 cat("\n\nThe dataMatrix column names and the sampleMetadata row names are not identical:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
136 print(cbind.data.frame(indice = 1:nrow(datMN),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
137 dataMatrix_columnnames=rownames(datMN),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
138 sampleMetadata_rownames=rownames(samDF))[rownames(datMN) != rownames(samDF), , drop = FALSE])
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
139 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
140
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
141 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
142
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
143 if(!identical(colnames(datMN), rownames(varDF))) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
144 ## checking variable names
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
145
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
146 chkL <- FALSE
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
147
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
148 datVarDifVc <- setdiff(colnames(datMN), rownames(varDF))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
149
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
150 if(length(datVarDifVc)) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
151 cat("\nThe following variables were found in the dataMatrix row names but not in the variableMetadata row names:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
152 print(cbind.data.frame(row = as.numeric(sapply(datVarDifVc, function(varC) which(colnames(datMN) == varC))),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
153 name = datVarDifVc))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
154
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
155 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
156
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
157 varDatDifVc <- setdiff(rownames(varDF), colnames(datMN))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
158
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
159 if(length(varDatDifVc)) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
160 cat("\n\nThe following variables were found in the variableMetadata row names but not in the dataMatrix row names:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
161 print(cbind.data.frame(row = as.numeric(sapply(varDatDifVc, function(varC) which(rownames(varDF) == varC))),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
162 name = varDatDifVc))
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
163 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
164
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
165 if(ncol(datMN) != nrow(varDF)) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
166 cat("\n\nThe dataMatrix has ", nrow(datMN), " rows (ie variables) whereas the variableMetadata has ", nrow(varDF), " rows\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
167 } else if(identical(sort(colnames(datMN)), sort(rownames(varDF)))) {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
168 cat("\n\nThe dataMatrix row names and the variableMetadata row names are not in the same order:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
169 print(cbind.data.frame(row = 1:ncol(datMN),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
170 dataMatrix_rownames=colnames(datMN),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
171 variableMetadata_rownames=rownames(varDF))[colnames(datMN) != rownames(varDF), , drop = FALSE])
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
172 } else {
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
173 cat("\n\nThe dataMatrix row names and the variableMetadata row names are not identical:\n", sep="")
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
174 print(cbind.data.frame(row = 1:ncol(datMN),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
175 dataMatrix_rownames=colnames(datMN),
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
176 variableMetadata_rownames=rownames(varDF))[colnames(datMN) != rownames(varDF), , drop = FALSE])
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
177 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
178 }
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
179
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
180 options(stringsAsFactors=optStrAsFacL)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
181
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
182 resLs <- list(chkL=chkL)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
183
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
184 return(resLs)
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
185
e194eec8e70c planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 8ebfbfa8d9449c9bbfbf569851a30b1e33df0b3f
ethevenot
parents:
diff changeset
186 } ## end of checkAndReadF