Mercurial > repos > nicolas > oghma
comparison prediction.R @ 22:f0d89ff35ad2 draft
Uploaded
| author | nicolas |
|---|---|
| date | Fri, 21 Oct 2016 10:35:13 -0400 |
| parents | |
| children | 8112bc642858 |
comparison
equal
deleted
inserted
replaced
| 21:1efd84f03444 | 22:f0d89ff35ad2 |
|---|---|
| 1 ######################################################## | |
| 2 # | |
| 3 # creation date : 26/01/16 | |
| 4 # last modification : 02/06/16 | |
| 5 # author : Dr Nicolas Beaume | |
| 6 # owner : IRRI | |
| 7 # | |
| 8 ######################################################## | |
| 9 log <- file(paste(getwd(), "log_prediction.txt", sep="/"), open = "wt") | |
| 10 sink(file = log, type="message") | |
| 11 | |
| 12 library(rrBLUP) | |
| 13 library(randomForest) | |
| 14 library(e1071) | |
| 15 library(glmnet) | |
| 16 library(methods) | |
| 17 ############################ helper functions ####################### | |
| 18 | |
| 19 ################################## main function ########################### | |
| 20 | |
| 21 | |
| 22 ############################ main ############################# | |
| 23 # running from terminal (supposing the OghmaGalaxy/bin directory is in your path) : | |
| 24 # predict.sh -i genotype_file -m model_file -n name -o path_to_result_directory | |
| 25 ## -i : path to the file that contains the genotypes. | |
| 26 # please note that the table must be called "genotype" when your datafile is saved into .rda (automatic if encode.R is used) | |
| 27 | |
| 28 ## -m : model build through any methods | |
| 29 # please note that the table must be called "model" when your datafile is saved into .rda | |
| 30 # (automatic if classifiers from this pipeline were used) | |
| 31 | |
| 32 ## -n : prefix of the names of all result files | |
| 33 | |
| 34 ## -o : path to the directory where the evaluation results are stored. | |
| 35 | |
| 36 classifierNames <- c("list", "randomForest", "svm", "glmnet") | |
| 37 cmd <- commandArgs(trailingOnly = T) | |
| 38 source(cmd[1]) | |
| 39 # load data | |
| 40 con = file(genotype) | |
| 41 genotype <- readLines(con = con, n = 1, ok=T) | |
| 42 close(con) | |
| 43 genotype <- read.table(genotype, sep="\t", h=T) | |
| 44 con = file(model) | |
| 45 model <- readLines(con = con, n = 1, ok=T) | |
| 46 close(con) | |
| 47 model <- readRDS(model) | |
| 48 # check if the classifier name is valid | |
| 49 if(all(is.na(match(class(model), classifierNames)))) { | |
| 50 stop(paste(class(model), "is not recognized as a valid model. Valid models are : ", classifierNames)) | |
| 51 } | |
| 52 # run prediction according to the classifier | |
| 53 if(any(class(model) == "list")) { | |
| 54 predictions <- as.matrix(genotype) %*% as.matrix(model$u) | |
| 55 predictions <- predictions[,1]+model$beta | |
| 56 predictions <- data.frame(lines=rownames(genotype), predictions=predictions) | |
| 57 } else if(any(class(model) == "glmnet")) { | |
| 58 predictions <- predict(model, as.matrix(genotype), type = "response") | |
| 59 predictions <- data.frame(lines=rownames(genotype), predictions=predictions) | |
| 60 } else { | |
| 61 predictions <- predict(model, genotype) | |
| 62 predictions <- data.frame(lines=names(predictions), predictions=predictions) | |
| 63 } | |
| 64 # save results | |
| 65 write.table(predictions, file=out, sep="\t", row.names = F) | |
| 66 |
