annotate IDchoice/IDchoice_script.R @ 1:bb19b1d15732 draft default tip

Uploaded
author melpetera
date Thu, 19 Dec 2019 05:29:57 -0500
parents b7a6a88f518a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
1 ################################################################################################
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
2 # ID CHOICE #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
3 # #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
4 # User: Galaxy #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
5 # Starting date: 01-06-2017 #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
6 # V-0.1: First version of code #
1
bb19b1d15732 Uploaded
melpetera
parents: 0
diff changeset
7 # V-1.0: Code adjusted to user feedback #
0
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
8 # #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
9 # #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
10 # Input files: dataMatrix ; Metadata file #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
11 # Output files: dataMatrix ; Metadata file #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
12 # #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
13 # Dependencies: RcheckLibrary.R ; miniTools.R (easyRlibrary) #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
14 # #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
15 ################################################################################################
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
16
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
17 # Parameters (for dev)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
18 if(FALSE){
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
19 DM.name <- "CaracSpe_dataMatrix.txt"
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
20 meta.name <- "CaracSpe_variableMetadata.txt"
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
21 metype <- "variable"
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
22 #coloname <- "namecustom"
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
23 coloname <- "B"
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
24 makeun <- "yes"
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
25 DMout <- "ID_DM.txt"
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
26 metaout <- paste0("ID_",metype,"meta.txt")
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
27 }
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
28
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
29
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
30 id.choice <- function(DM.name,meta.name,metype,coloname,makeun,DMout,metaout){
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
31 # This function allows to replace original IDs with other ones from one metadata table.
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
32 #
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
33 # Parameters:
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
34 # - DM.name, meta.name: dataMatrix and metadata files' access respectively
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
35 # - metype: "sample" or "variable" depending on metadata content
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
36 # - coloname: name of the metadata column to be used as new ID
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
37 # - makeun: "yes" or "no" depending on user choice if new IDs are not unique ("yes"=conversion to unique ID)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
38 # - DMout, metaout: output files' access
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
39
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
40
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
41 # Input --------------------------------------------------------------
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
42
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
43 DM <- read.table(DM.name,header=TRUE,sep="\t",check.names=FALSE)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
44 meta <- read.table(meta.name,header=TRUE,sep="\t",check.names=FALSE,colClasses="character")
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
45
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
46 # Table match check
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
47 table.check <- match2(DM,meta,metype)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
48 check.err(table.check)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
49
1
bb19b1d15732 Uploaded
melpetera
parents: 0
diff changeset
50 # Keep metadata original order tracked ----------------------------------------
bb19b1d15732 Uploaded
melpetera
parents: 0
diff changeset
51
bb19b1d15732 Uploaded
melpetera
parents: 0
diff changeset
52 meta <- data.frame(meta,ori=1:nrow(meta))
bb19b1d15732 Uploaded
melpetera
parents: 0
diff changeset
53
0
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
54
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
55 # Checking unicity of new IDs ----------------------------------------
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
56
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
57 numcol <- which(colnames(meta)==coloname)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
58 if(length(numcol)==0) {
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
59 stop(paste0("\n-------\nWarning: no '",coloname,"' column detected in ",metype," metadata!",
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
60 "\nPlease check your metadata file (column names are case-sensitive).\n-------\n"))
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
61 }
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
62
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
63 unicity <- duplicated(meta[,numcol])
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
64
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
65 if(sum(unicity)>0){
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
66 if(makeun=="no"){
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
67 #Sending back an explicit error
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
68 duptable <- t(t(table(meta[,numcol][unicity])+1))
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
69 stop(paste0("\n-------\nYour '",coloname,"' column contains duplicates:\n"),
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
70 paste(rownames(duptable),duptable,sep=": ",collapse="\n"),paste0("\nSince identifiers are meant to be unique, ",
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
71 "please check your data or use the 'Force unicity' option to force unicity.\n-------\n"))
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
72
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
73 }else{
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
74 #Making unique names
1
bb19b1d15732 Uploaded
melpetera
parents: 0
diff changeset
75 meta <- cbind(meta,newID=make.unique(meta[,numcol],sep="_"))
0
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
76 }
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
77 }else{
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
78 #No unicity problem
1
bb19b1d15732 Uploaded
melpetera
parents: 0
diff changeset
79 meta <- cbind(meta,newID=meta[,numcol])
0
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
80 }
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
81
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
82
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
83 # Merging tables -----------------------------------------------------
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
84
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
85 #Transposing the dataMatrix if necessary
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
86 if(metype=="sample"){
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
87 rownames(DM) <- DM[,1]
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
88 DM <- DM[,-1]
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
89 DM <- t(DM)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
90 DM <- data.frame(sample=row.names(DM),DM,check.names=FALSE)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
91 rownames(DM) <- NULL
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
92 }
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
93
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
94 comb.data <- merge(x=meta,y=DM,by.x=1,by.y=1)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
95 comb.data <- comb.data[order(comb.data$ori),]
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
96
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
97
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
98 # Changing IDs -------------------------------------------------------
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
99
1
bb19b1d15732 Uploaded
melpetera
parents: 0
diff changeset
100 DM <- comb.data[,-c(1:(ncol(meta)-1))]
0
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
101 if(makeun=="no"){
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
102 comb.data <- comb.data[,c(numcol,which(colnames(meta)!=coloname))]
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
103 meta <- comb.data[,c(1:(ncol(meta)-2))]
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
104 }else{
1
bb19b1d15732 Uploaded
melpetera
parents: 0
diff changeset
105 meta <- comb.data[,c(ncol(meta),1:(ncol(meta)-2))]
0
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
106 }
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
107
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
108 #Transposing back the dataMatrix if necessary
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
109 if(metype=="sample"){
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
110 rownames(DM) <- DM[,1]
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
111 DM <- DM[,-1]
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
112 DM <- t(DM)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
113 DM <- data.frame(sample=row.names(DM),DM,check.names=FALSE)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
114 rownames(DM) <- NULL
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
115 }
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
116
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
117
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
118 # Output -------------------------------------------------------------
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
119
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
120 # Writing the table
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
121 write.table(DM,DMout,sep="\t",quote=FALSE,row.names=FALSE)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
122 write.table(meta,metaout,sep="\t",quote=FALSE,row.names=FALSE)
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
123
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
124
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
125 } # End of id.choice
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
126
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
127
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
128 # Typical function call
b7a6a88f518a Uploaded
melpetera
parents:
diff changeset
129 # id.choice(DM.name,meta.name,metype,coloname,makeun,DMout,metaout)