Galaxy |

Changeset 2:4fa4d5c8036a (2012-08-20)

Previous changeset 1:dcf98c713e4a (2012-08-20) Next changeset 3:cd35ace22905 (2012-08-20)

Commit message:
Uploaded

added:
kmersvm/.gitignore
kmersvm/kmersvm.tar.gz

removed:
kmersvm/rocprcurve.xml

diff -r dcf98c713e4a -r 4fa4d5c8036a kmersvm/.gitignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kmersvm/.gitignore Mon Aug 20 21:33:00 2012 -0400

@@ -0,0 +1,1 @@
+scripts/libkmersvm.pyc

diff -r dcf98c713e4a -r 4fa4d5c8036a kmersvm/kmersvm.tar.gz

Binary file kmersvm/kmersvm.tar.gz has changed

diff -r dcf98c713e4a -r 4fa4d5c8036a kmersvm/rocprcurve.xml
--- a/kmersvm/rocprcurve.xml Mon Aug 20 20:04:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,147 +0,0 @@
-<tool id="ROC-PR Curve" name="ROC-PR Curve">
- <description>calculates AUC for ROC and PR curves</description>
- <command interpreter="sh">r_wrapper.sh $script_file</command>
- <inputs>
- <param format="tabular" name="cvpred_data" type="data" label="CV Predictions"/>
- </inputs>
- <outputs>
- 
- <data format="png" name="rocprc.png" from_work_dir="rocprc.png" />
- </outputs>
-
- <configfiles>
- <configfile name="script_file">
-
- rm(list = objects() )
-
- ########## calculate auprc #########
- auPRC <- function (perf) {
- rec <- perf@x.values
- prec <- perf@y.values
- result <- list()
- for (i in 1:length(perf@x.values)) {
- result[i] <- list(sum((rec[[i]][2:length(rec[[i]])] - rec[[i]][2:length(rec[[i]])-1])*prec[[i]][-1]))
- }
- return(result)
- }
-
- ########## plot ROC and PR-Curve #########
- rocprc <- function(x) {
- sink(NULL,type="message")
- options(warn=-1)
- suppressMessages(suppressWarnings(library('ROCR')))
- svmresult <- data.frame(x)
- colnames(svmresult) <- c("Seqid","Pred","Label", "CV")
-
- linewd <- 1
- wd <- 4
- ht <- 4
- fig.nrows <- 1
- fig.ncols <- 2
- pt <- 10
- cex.general <- 1
- cex.lab <- 0.9
- cex.axis <- 0.9
- cex.main <- 1.2
- cex.legend <- 0.8
-
-
- #pdf("rocprc.pdf", width=wd*fig.ncols, height=ht*fig.nrows)
- png("rocprc.png", width=wd*fig.ncols, height=ht*fig.nrows, unit="in", res=100)
-
- par(xaxs="i", yaxs="i", mar=c(3.5,3.5,2,2)+0.1, mgp=c(2,0.8,0), mfrow=c(fig.nrows, fig.ncols))
-
- CVs <- unique(svmresult[["CV"]])
- preds <- list()
- labs <- list()
- auc <- c()
- for(i in 1:length(CVs)) {
- preds[i] <- subset(svmresult, CV==(i-1), select=c(Pred))
- labs[i] <- subset(svmresult, CV==(i-1), select=c(Label))
- }
-
- pred <- prediction(preds, labs)
- perf_roc <- performance(pred, 'tpr', 'fpr')
- perf_prc <- performance(pred, 'prec', 'rec')
-
- perf_auc <- performance(pred, 'auc')
- prcs <- auPRC(perf_prc)
- avgauc <- 0
- avgprc <- 0
-
- for(j in 1:length(CVs)) {
- avgauc <- avgauc + perf_auc@y.values[[j]]
- avgprc <- avgprc + prcs[[j]]
- }
-
- avgauc <- avgauc/length(CVs)
- avgprc <- avgprc/length(CVs)
-
- #preds_merged <- unlist(preds)
- #labs_merged <- unlist(labs)
- #pred_merged <- prediction(preds_merged, labs_merged)
- #perf_merged_auc <- performance(pred_merged, 'auc')
-
- plot(perf_roc, colorize=T, main="ROC curve", spread.estimate="stderror",
- xlab="1-Specificity", ylab="Sensitivity", cex.lab=1.2)
- text(0.2, 0.1, paste("AUC=", format(avgauc, digits=3, nsmall=3)))
-
- plot(perf_prc, colorize=T, main="P-R curve", spread.estimate="stderror",
- xlab="Recall", ylab="Precision", cex.lab=1.2, xlim=c(0,1), ylim=c(0,1))
- text(0.2, 0.1, paste("AUC=", format(avgprc, digits=3, nsmall=3)))
-
- dev.off()
- }
-
- ############## main function #################
- d <- read.table("${cvpred_data}")
-
- rocprc(d)
-
- </configfile>
- </configfiles>
-
- <help>
-
-**Note**
-
-This tool is based on the ROCR library.  If you use this tool please cite:
-
-Tobias Sing, Oliver Sander, Niko Beerenwinkel, Thomas Lengauer.
-ROCR: visualizing classifier performance in R.
-Bioinformatics 21(20):3940-3941 (2005).
-
-----
-
-**What it does**
-
-Takes as input cross-validation predictions and calculates ROC Curve and its area under curve (AUC) and PR Curve and its AUC.
-
-----
-
-**Results**
-
-ROC Curve: Receiver Operating Characteristic Curve. Compares true positive rate (sensitivity) to false positive rate (1 - specificity).
-
-PR Curve: Precision Recall Curve. Compares number of true positives (recall; same as sensitivity) to the number of true positives relative to the total number sequences classified as positive (precision).
-
-AUC for a given curve: Area Under the Curve: Probability that of a randomly selected positive/negative pair, the positive will be scored more highly by the trained SVM than a negative.
-
-.. class:: infomark
-
-Both curves measure SVM performance, but ROC curves can be inaccurate if there is a large skew in class distribution.  For more information see:
-
-Jesse Davis, Mark Goadrich.
-The Relationship Between Precision-Recall and ROC Curves.
-Proceedings of the 23rd Annual Internation Conference on Machine Learning.
-Pittsburgh, PA, 2006.
-
-----
-
-**Example**
-
-.. image:: ./static/images/sample_roc_chen.png
- </help>
-</tool>