Mercurial > repos > bitlab > plidflow
comparison PLIDflow/scripts/clusterfinder_Auto.R @ 0:6fcfa4756040 draft
Uploaded
| author | bitlab | 
|---|---|
| date | Tue, 14 Jan 2020 06:09:42 -0500 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:6fcfa4756040 | 
|---|---|
| 1 # #clusterfinder_Auto.R makes a file containig the coordenates of the identified cluster/-s by Silhouette criterion | |
| 2 # | |
| 3 #!/usr/bin/env Rscript | |
| 4 args = commandArgs(trailingOnly=TRUE) | |
| 5 | |
| 6 if(length(args) < 1){ | |
| 7 stop("USE: Rscript clusterfinder_Auto.R <session_id>") | |
| 8 } | |
| 9 | |
| 10 #Arguments definition | |
| 11 | |
| 12 session_id <- args[1] | |
| 13 | |
| 14 print(session_id) | |
| 15 | |
| 16 setwd(session_id) | |
| 17 | |
| 18 options(error=traceback) | |
| 19 options(warn=-1) | |
| 20 | |
| 21 #Load required library | |
| 22 library(cluster) | |
| 23 | |
| 24 #Load required libraries | |
| 25 library(clValid) | |
| 26 | |
| 27 #Loading required package: cluster | |
| 28 library(fpc) | |
| 29 | |
| 30 #Install NbClust package | |
| 31 #install.packages("NbClust",dependencies = TRUE) | |
| 32 | |
| 33 #Loading required package: NbClust | |
| 34 library(NbClust) | |
| 35 | |
| 36 | |
| 37 #Read the dataset | |
| 38 data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="") | |
| 39 #data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="") | |
| 40 | |
| 41 | |
| 42 #Run the XXX for selection of the number of clusters | |
| 43 | |
| 44 clustersdata <- NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1) | |
| 45 | |
| 46 #capture.output(clustersdata$Best.nc[1,], file = "bestnumberclusters.txt") | |
| 47 | |
| 48 #capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "proofclusters.txt") | |
| 49 | |
| 50 #capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "numclusters.txt") | |
| 51 | |
| 52 #best_num_cluster <- scan("proofclusters.txt", what = character(), quiet = TRUE) | |
| 53 | |
| 54 k.max <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))]) | |
| 55 | |
| 56 km.res <- kmeans(as.matrix(data_centers), centers = k.max, nstart = 25) | |
| 57 | |
| 58 best_position <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))]) | |
| 59 | |
| 60 # Make clustercoordenates table | |
| 61 #X Coordenate | |
| 62 start_x <- best_position + 1 | |
| 63 end_x <- start_x + (best_position - 1) | |
| 64 | |
| 65 cluster_pos_x <- 1 | |
| 66 x_pos <- c() | |
| 67 for (c in start_x:end_x){ | |
| 68 x_pos[cluster_pos_x] <- km.res$centers[c] | |
| 69 cluster_pos_x <- cluster_pos_x + 1 | |
| 70 } | |
| 71 #print(x_pos) | |
| 72 | |
| 73 #Y Coordenate | |
| 74 start_y <- end_x + 1 | |
| 75 end_y <- start_y + (best_position - 1) | |
| 76 | |
| 77 cluster_pos_y <- 1 | |
| 78 y_pos <- c() | |
| 79 for(cc in start_y:end_y){ | |
| 80 y_pos[cluster_pos_y] <- km.res$centers[cc] | |
| 81 cluster_pos_y <- cluster_pos_y + 1 | |
| 82 } | |
| 83 #print(y_pos) | |
| 84 | |
| 85 #Z Coordenate | |
| 86 start_z <- end_y + 1 | |
| 87 end_z <- start_z + (best_position - 1) | |
| 88 | |
| 89 cluster_pos_z <- 1 | |
| 90 z_pos <- c() | |
| 91 for(ccc in start_z:end_z){ | |
| 92 z_pos[cluster_pos_z] <- km.res$centers[ccc] | |
| 93 cluster_pos_z <- cluster_pos_z + 1 | |
| 94 } | |
| 95 #print(z_pos) | |
| 96 | |
| 97 #Create a file with clusters coordenates. Cluster coordenates are vectors x_pos, y_pos, z_pos | |
| 98 num_filas <- length(x_pos) | |
| 99 clusters_tabla <- matrix(1, nrow = num_filas, ncol = 4) #columns are column 1 number of ccluster, column 2 x-coordenates, colum 3 y-coordenates, column 4 z-coordenates | |
| 100 | |
| 101 ##Add number of cluster located in column 1 | |
| 102 for(i in 1:num_filas){ | |
| 103 clusters_tabla[i,1] <- i | |
| 104 } | |
| 105 | |
| 106 ##Add cluster x,y,z-coordenates in clusters_tabla | |
| 107 #x-coordenates | |
| 108 v_pos_x <- 1 | |
| 109 for(f in 1:num_filas){ | |
| 110 clusters_tabla[f,2] <- x_pos[v_pos_x] | |
| 111 v_pos_x <- v_pos_x + 1 | |
| 112 } | |
| 113 | |
| 114 #y-coordenates | |
| 115 v_pos_y <- 1 | |
| 116 for(ff in 1:num_filas){ | |
| 117 clusters_tabla[ff,3] <- y_pos[v_pos_y] | |
| 118 v_pos_y <- v_pos_y + 1 | |
| 119 } | |
| 120 | |
| 121 #z-coordenates | |
| 122 v_pos_z <- 1 | |
| 123 for(fff in 1:num_filas){ | |
| 124 clusters_tabla[fff,4] <- z_pos[v_pos_z] | |
| 125 v_pos_z <- v_pos_z + 1 | |
| 126 } | |
| 127 | |
| 128 #Write the head for the file which contains the number of clusters annd their coordenates | |
| 129 cabecera_clusterscoordenates <- paste("cluster", sep = " ") | |
| 130 cabecera <- c("x","y","z") | |
| 131 | |
| 132 for(i in 1:3){ | |
| 133 cabecera_clusterscoordenates <- paste(cabecera_clusterscoordenates, cabecera[i], sep=" ") | |
| 134 } | |
| 135 #write(cabecera_clusterscoordenates, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE) | |
| 136 write(cabecera_clusterscoordenates, file="clusterscoordenates.txt", append= TRUE) | |
| 137 | |
| 138 #Write rows containing number of cluster and coordenates | |
| 139 for(i in 1:nrow(clusters_tabla)){ | |
| 140 fila_completa <- paste(clusters_tabla[i,1], sep = " ") | |
| 141 for(j in 2:ncol(clusters_tabla)){ | |
| 142 fila_completa <- paste(fila_completa, clusters_tabla[i,j], sep=" ") | |
| 143 } | |
| 144 write(fila_completa, file="clusterscoordenates.txt", append= TRUE) | |
| 145 #write(fila_completa, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE) | |
| 146 } | |
| 147 | |
| 148 | |
| 149 #Table to see Binding Sited finded in Galaxy screen | |
| 150 #BS_screen <- scan("/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", what = character(), quiet = TRUE) | |
| 151 BS_screen <- scan("clusterscoordenates.txt", what = character(), quiet = TRUE) | |
| 152 | |
| 153 BS_table <- matrix(1, nrow = nrow(clusters_tabla), ncol = ncol(clusters_tabla)) | |
| 154 | |
| 155 bs_count <- 5 | |
| 156 for(i in 1:nrow(clusters_tabla)){ | |
| 157 for(j in 1:ncol(clusters_tabla)){ | |
| 158 BS_table[i,j] <- BS_screen[bs_count] | |
| 159 bs_count <- bs_count +1 | |
| 160 } | |
| 161 } | |
| 162 | |
| 163 colnames(BS_table) <- c("Binding Site", "X", "Y", "Z") | |
| 164 print(BS_table) | |
| 165 | |
| 166 | |
| 167 | |
| 168 | |
| 169 | |
| 170 | 
