Galaxy |

Changeset 31:4f017b111d6c (2016-10-25)

Previous changeset 30:e85041eeda80 (2016-10-25) Next changeset 32:9230fde1d48e (2016-10-25)

Commit message:
Uploaded

modified:
evaluation.R

diff -r e85041eeda80 -r 4f017b111d6c evaluation.R
--- a/evaluation.R Tue Oct 25 14:39:13 2016 -0400
+++ b/evaluation.R Tue Oct 25 14:39:30 2016 -0400

[

@@ -8,30 +8,14 @@
#
########################################################

-log <- file(paste(getwd(), "log_evaluation.txt", sep="/"), open = "wt")
-sink(file = log, type="message")
-
-library("pROC")
-library("randomForest")
-library("miscTools")
-
+# compute correlation
predictionCorrelation <- function(prediction, obs) {
   return(cor(prediction, obs, method = "pearson"))
}

-r2.plot <- function(true, predicted, scatterplot=T) {
-  if(scatterplot) {
-    plot(true, predicted, xlab="trait value", ylab="predicted value", main="", pch=16,
-         ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))))
-    lines(true, true, col="red")
-  } else {
-    plot(true, ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))),
-         xlab="individual index", ylab="traitValue", type="l", main="")
-    lines(predicted, col="red")
-    legend("topright", legend = c("pedicted", "traitValue"), col = c("red", "black"), lty=c(1,1))
-  }
-}
-
+# compute r2 by computing the classic formula
+# compare the sum of square difference from target to prediciton
+# to the sum of square difference from target to the mean of the target
r2 <- function(target, prediction) {
   sst <- sum((target-mean(target))^2)
   ssr <- sum((target-prediction)^2)
@@ -41,8 +25,9 @@
################################## main function ###########################

evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F,
-                                prefix="data", folds=NULL, classes=NULL, doCor=F, path=".") {
+                                folds=NULL, classes=NULL, doCor=F) {
   eval <- NULL
+  # case of r2
   if(doR2) {
     eval <- c(eval, list(r2=NULL))
   }
@@ -54,14 +39,12 @@
     test <- folds[[i]]
     if(doR2) {
       if(!is.null(traitValue)) {
-        jpeg(paste(path, "/", prefix,"fold_",i,"_scatterPlot.jpeg", sep=""))
         eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]]))
-        r2.plot(traitValue[test], prediction[[i]])
-        dev.off()
       } else {
         eval$r2 <- c(eval$r2, NA)
       }
     }
+    # case of correlation
     if(doCor) {
       if(!is.null(traitValue)) {
         eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]]))
@@ -70,31 +53,18 @@
       }
     }
   }
+  # print output
   print(eval)
-  write.table(eval, file = paste(path,"/",prefix,"_evaluation.csv", sep=""), sep="\t", row.names = F)
+  if(doR2) {
+    print(paste("mean r2 : ", mean(eval$r2), "standard deviation :", sd(eval$r2)))
+  }
+  if(doCor) {
+    print(paste("mean correlation : ", mean(eval$cor), "standard deviation :", sd(eval$cor)))
+  }
}
############################ main #############################
-# evaluation.sh -i path_to_data -p prediction -f folds -t phenotype -r -c -a -n name -o path_to_result_directory
-## -i : path to the file that contains the genotypes.
-# please note that the table must be called "encoded" when your datafile is saved into .rda (automatic if encode.R is used)

-## -p : prediction made through any methods
-# please note that the table must be called "prediction" when your datafile is saved into .rda
-# (automatic if prediction methods from this pipeline were used)
-
-## -f : index of the folds to which belong each individual
-# please note that the list must be called "folds" (automatic if folds.R was used)
-
-## -t : phenotype of each individual
-# please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)
-
-## -r : flag to run a R2 evaluation
-
-## -c : flag to run a correlation evaluation
-
-## -n : prefix of the names of all result files
-
-## -o : path to the directory where the evaluation results are stored.
+# load arguments
cmd <- commandArgs(trailingOnly = T)
source(cmd[1])
# set which evaluation are used
@@ -120,5 +90,6 @@
folds <- readLines(con = con, n = 1, ok=T)
close(con)
folds <- readRDS(folds)
+# evaluation
evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2,
-                    prefix=name, folds=folds, doCor=doCor, path=out)
\ No newline at end of file
+                    folds=folds, doCor=doCor)
\ No newline at end of file