annotate kmersvm/roccurve.xml @ 4:18e2ebf5ff19 draft

Uploaded
author cafletezbrant
date Mon, 20 Aug 2012 21:38:29 -0400
parents cd35ace22905
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
1 <tool id="ROC Curve" name="Plot ROC Curve">
4
18e2ebf5ff19 Uploaded
cafletezbrant
parents: 3
diff changeset
2 <description>using kmerSVM predictions</description>
0
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
3 <command interpreter="sh">r_wrapper.sh $script_file</command>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
4 <inputs>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
5 <param format="tabular" name="cvpred_data" type="data" label="CV Predictions"/>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
6 </inputs>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
7 <outputs>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
8 <data format="png" name="roccurve.png" from_work_dir="roccurve.png" />
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
9 </outputs>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
10
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
11 <configfiles>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
12 <configfile name="script_file">
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
13
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
14 rm(list = objects() )
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
15
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
16 ########## plot ROC and PR-Curve #########
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
17 roccurve &lt;- function(x) {
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
18 sink(NULL,type="message")
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
19 options(warn=-1)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
20 suppressMessages(suppressWarnings(library('ROCR')))
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
21 svmresult &lt;- data.frame(x)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
22 colnames(svmresult) &lt;- c("Seqid","Pred","Label", "CV")
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
23
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
24 linewd &lt;- 1
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
25 wd &lt;- 4
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
26 ht &lt;- 4
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
27 fig.nrows &lt;- 1
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
28 fig.ncols &lt;- 1
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
29 pt &lt;- 10
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
30 cex.general &lt;- 1
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
31 cex.lab &lt;- 0.9
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
32 cex.axis &lt;- 0.9
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
33 cex.main &lt;- 1.2
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
34 cex.legend &lt;- 0.8
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
35
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
36 png("roccurve.png", width=wd*fig.ncols, height=ht*fig.nrows, unit="in", res=100)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
37
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
38 par(xaxs="i", yaxs="i", mar=c(3.5,3.5,2,2)+0.1, mgp=c(2,0.8,0), mfrow=c(fig.nrows, fig.ncols))
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
39
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
40 CVs &lt;- unique(svmresult[["CV"]])
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
41 preds &lt;- list()
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
42 labs &lt;- list()
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
43 auc &lt;- c()
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
44 for(i in 1:length(CVs)) {
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
45 preds[i] &lt;- subset(svmresult, CV==(i-1), select=c(Pred))
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
46 labs[i] &lt;- subset(svmresult, CV==(i-1), select=c(Label))
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
47 }
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
48
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
49 pred &lt;- prediction(preds, labs)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
50 perf_roc &lt;- performance(pred, 'tpr', 'fpr')
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
51 perf_auc &lt;- performance(pred, 'auc')
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
52
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
53 avgauc &lt;- 0
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
54
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
55 for(j in 1:length(CVs)) {
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
56 avgauc &lt;- avgauc + perf_auc@y.values[[j]]
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
57 }
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
58
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
59 avgauc &lt;- avgauc/length(CVs)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
60
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
61 plot(perf_roc, colorize=T, main="ROC curve", spread.estimate="stderror",
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
62 xlab="1-Specificity", ylab="Sensitivity", cex.lab=1.2)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
63 text(0.2, 0.1, paste("AUC=", format(avgauc, digits=3, nsmall=3)))
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
64
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
65 dev.off()
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
66 }
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
67
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
68 ############## main function #################
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
69 d &lt;- read.table("${cvpred_data}")
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
70
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
71 roccurve(d)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
72
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
73 </configfile>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
74 </configfiles>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
75
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
76 <help>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
77
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
78 **Note**
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
79
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
80 This tool is based on the ROCR library. If you use this tool please cite:
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
81
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
82 Tobias Sing, Oliver Sander, Niko Beerenwinkel, Thomas Lengauer.
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
83 ROCR: visualizing classifier performance in R.
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
84 Bioinformatics 21(20):3940-3941 (2005).
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
85
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
86 ----
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
87
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
88 **What it does**
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
89
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
90 Takes as input cross-validation predictions and calculates ROC Curve and its area under curve (AUC).
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
91
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
92 ----
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
93
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
94 **Results**
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
95
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
96 ROC Curve: Receiver Operating Characteristic Curve. Compares true positive rate (sensitivity) to false positive rate (1 - specificity).
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
97
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
98 Area Under the ROC Curve (AUC): Probability that of a randomly selected positive/negative pair, the positive will be scored more highly by the trained SVM than a negative.
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
99
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
100 .. class:: infomark
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
101
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
102 ROC curves can be inaccurate if there is a large skew in class distribution. For more information see:
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
103
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
104 Jesse Davis, Mark Goadrich.
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
105 The Relationship Between Precision-Recall and ROC Curves.
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
106 Proceedings of the 23rd Annual Internation Conference on Machine Learning.
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
107 Pittsburgh, PA, 2006.
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
108
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
109 <!--
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
110 **Example**
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
111
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
112 .. image:: ./static/images/sample_roc_chen.png
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
113 -->
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
114 </help>
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
115 </tool>