annotate spring_roc.py @ 38:80a4b98121b6 draft

"planemo upload commit 22cd6b0fa88ce0ddc4052beab306f5ba10754f12"
author guerler
date Wed, 25 Nov 2020 17:38:24 +0000
parents 0bcc0a269916
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
1 #! /usr/bin/env python
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
2 import argparse
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
3 import math
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
4 import random
30
b0e195a47df7 "planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
guerler
parents: 29
diff changeset
5 from os.path import isfile
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
6
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
7 from matplotlib import pyplot as plt
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
8
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
9
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
10 def getIds(rawIds):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
11 return rawIds.split("|")
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
12
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
13
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
14 def getCenterId(rawId):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
15 elements = rawId.split("|")
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
16 if len(elements) > 1:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
17 return elements[1]
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
18 return rawId
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
19
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
20
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
21 def getOrganism(rawId):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
22 elements = rawId.split("_")
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
23 return elements[-1]
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
24
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
25
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
26 def getKey(a, b):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
27 if a > b:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
28 name = "%s_%s" % (a, b)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
29 else:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
30 name = "%s_%s" % (b, a)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
31 return name
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
32
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
33
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
34 def getPercentage(rate, denominator):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
35 if denominator > 0:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
36 return 100.0 * rate / denominator
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
37 return 0.0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
38
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
39
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
40 def getFilter(filterName):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
41 print("Loading target organism(s)...")
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
42 filterSets = dict()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
43 with open(filterName) as filterFile:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
44 for line in filterFile:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
45 columns = line.split()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
46 for colIndex in [0, 1]:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
47 if colIndex >= len(columns):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
48 break
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
49 colEntry = columns[colIndex]
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
50 id = getCenterId(colEntry)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
51 organism = getOrganism(colEntry)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
52 if organism not in filterSets:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
53 filterSets[organism] = set()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
54 filterSets[organism].add(id)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
55 print("Organism(s) in set: %s." % filterSets.keys())
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
56 return filterSets
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
57
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
58
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
59 def getReference(fileName, filterA=None, filterB=None, minScore=None, aCol=0,
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
60 bCol=1, scoreCol=-1, separator=None,
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
61 skipFirstLine=False, filterValues=list()):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
62 index = dict()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
63 count = 0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
64 with open(fileName) as fp:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
65 line = fp.readline()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
66 if skipFirstLine:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
67 line = fp.readline()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
68 while line:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
69 ls = line.split(separator)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
70 if separator is not None:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
71 aList = getIds(ls[aCol])
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
72 bList = getIds(ls[bCol])
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
73 else:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
74 aList = [getCenterId(ls[aCol])]
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
75 bList = [getCenterId(ls[bCol])]
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
76 validEntry = False
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
77 for a in aList:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
78 for b in bList:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
79 skip = False
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
80 if a == "-" or b == "-":
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
81 skip = True
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
82 if filterA is not None:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
83 if a not in filterA and b not in filterA:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
84 skip = True
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
85 if filterB is not None:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
86 if a not in filterB and b not in filterB:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
87 skip = True
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
88 for f in filterValues:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
89 if len(ls) > f[0]:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
90 columnEntry = ls[f[0]].lower()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
91 searchEntry = f[1].lower()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
92 if columnEntry.find(searchEntry) == -1:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
93 skip = True
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
94 if not skip:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
95 name = getKey(a, b)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
96 if name not in index:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
97 validEntry = True
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
98 if scoreCol >= 0 and len(ls) > scoreCol:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
99 score = float(ls[scoreCol])
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
100 skip = False
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
101 if minScore is not None:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
102 if minScore > score:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
103 return index, count
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
104 if not skip:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
105 index[name] = score
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
106 else:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
107 index[name] = 1.0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
108 if validEntry:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
109 count = count + 1
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
110 line = fp.readline()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
111 return index, count
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
112
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
113
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
114 def getXY(prediction, positive, positiveCount, negative):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
115 sortedPrediction = sorted(prediction.items(), key=lambda x: x[1],
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
116 reverse=True)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
117 positiveTotal = positiveCount
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
118 negativeTotal = len(negative)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
119 x = list([0])
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
120 y = list([0])
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
121 xMax = 0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
122 topCount = 0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
123 topMCC = 0.0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
124 topPrecision = 0.0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
125 topScore = 0.0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
126 tp = 0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
127 fp = 0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
128 count = 0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
129 for (name, score) in sortedPrediction:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
130 found = False
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
131 if name in positive:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
132 found = True
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
133 tp = tp + 1
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
134 if name in negative:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
135 found = True
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
136 fp = fp + 1
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
137 precision = 0.0
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
138 if tp > 0 or fp > 0:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
139 precision = tp / (tp + fp)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
140 fn = positiveTotal - tp
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
141 tn = negativeTotal - fp
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
142 denom = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
143 if denom > 0.0:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
144 mcc = (tp*tn-fp*fn)/math.sqrt(denom)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
145 if mcc >= topMCC:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
146 topMCC = mcc
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
147 topScore = score
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
148 topCount = count
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
149 topPrecision = precision
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
150 if found:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
151 yValue = getPercentage(tp, tp + fn)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
152 xValue = getPercentage(fp, fp + tn)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
153 y.append(yValue)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
154 x.append(xValue)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
155 xMax = max(xValue, xMax)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
156 count = count + 1
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
157 print("Top ranking prediction %s." % str(sortedPrediction[0]))
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
158 print("Total count of prediction set: %s (precision=%1.2f)." %
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
159 (topCount, topPrecision))
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
160 print("Total count of positive set: %s." % len(positive))
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
161 print("Total count of negative set: %s." % len(negative))
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
162 print("Matthews-Correlation-Coefficient: %s at Score >= %s." %
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
163 (round(topMCC, 2), topScore))
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
164 return x, y, xMax
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
165
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
166
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
167 def main(args):
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
168 # load source files
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
169 filterSets = getFilter(args.input)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
170 filterKeys = list(filterSets.keys())
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
171 filterA = filterSets[filterKeys[0]]
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
172 if len(filterKeys) > 1:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
173 filterB = filterSets[filterKeys[1]]
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
174 else:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
175 filterB = filterA
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
176
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
177 # identify biogrid filter options
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
178 filterValues = []
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
179 if args.method:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
180 filterValues.append([11, args.method])
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
181 if args.experiment:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
182 filterValues.append([12, args.experiment])
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
183 if args.throughput:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
184 filterValues.append([17, args.throughput])
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
185
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
186 # process biogrid database
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
187 print("Loading positive set from BioGRID file...")
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
188 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
189 separator="\t", filterA=filterA,
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
190 filterB=filterB, skipFirstLine=True,
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
191 filterValues=filterValues)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
192
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
193 # rescan biogrid database to identify set of putative interactions
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
194 if filterValues:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
195 print("Filtered entries by (column, value): %s" % filterValues)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
196 print("Loading putative set from BioGRID file...")
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
197 putative, putativeCount = getReference(args.biogrid, aCol=23, bCol=26,
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
198 separator="\t", filterA=filterA,
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
199 filterB=filterB,
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
200 skipFirstLine=True)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
201 print("Found %s." % putativeCount)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
202 else:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
203 putative = positive
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
204
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
205 # process prediction file
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
206 print("Loading prediction file...")
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
207 prediction, _ = getReference(args.input, scoreCol=2)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
208
34
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
209 # determine negative set
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
210 print("Identifying non-interacting pairs...")
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
211 negative = set()
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
212 if isfile(args.negative):
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
213 # load from explicit file
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
214 with open(args.negative) as file:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
215 for line in file:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
216 cols = line.split()
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
217 nameA = cols[0]
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
218 nameB = cols[1]
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
219 key = getKey(nameA, nameB)
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
220 if key not in putative and key not in negative:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
221 negative.add(key)
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
222 else:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
223 # get subcellular locations from UniProt export
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
224 locations = dict()
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
225 if isfile(args.locations):
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
226 regions = list()
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
227 if args.regions:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
228 regions = args.regions.split(",")
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
229 with open(args.locations) as locFile:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
230 for line in locFile:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
231 searchKey = "SUBCELLULAR LOCATION"
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
232 searchPos = line.find(searchKey)
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
233 if searchPos != -1:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
234 uniId = line.split()[0]
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
235 locStart = searchPos + len(searchKey) + 1
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
236 locId = line[locStart:].split()[0]
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
237 if regions:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
238 if locId not in regions:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
239 continue
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
240 if uniId in filterA or uniId in filterB:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
241 locations[uniId] = locId
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
242 print("Found %d subcellular locations." % (len(list(locations.keys()))))
30
b0e195a47df7 "planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
guerler
parents: 29
diff changeset
243
34
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
244 # randomly sample non-interacting pairs
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
245 filterAList = sorted(list(filterA))
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
246 filterBList = sorted(list(filterB))
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
247 negativeRequired = positiveCount
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
248 random.seed(0)
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
249 totalAttempts = int(len(filterAList) * len(filterBList) / 2)
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
250 while totalAttempts > 0:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
251 totalAttempts = totalAttempts - 1
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
252 nameA = random.choice(filterAList)
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
253 nameB = random.choice(filterBList)
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
254 if locations:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
255 if nameA not in locations or nameB not in locations:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
256 continue
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
257 if locations[nameA] == locations[nameB]:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
258 continue
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
259 key = getKey(nameA, nameB)
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
260 if key not in putative and key not in negative:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
261 negative.add(key)
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
262 negativeRequired = negativeRequired - 1
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
263 if negativeRequired == 0:
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
264 break
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
265
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
266 # create plot
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
267 print("Producing plot data...")
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
268 print("Total count in prediction file: %d." % len(prediction))
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
269 print("Total count in positive file: %d." % len(positive))
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
270 plt.ylabel('True Positive Rate (%)')
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
271 plt.xlabel('False Positive Rate (%)')
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
272 title = " vs. ".join(filterSets)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
273 plt.suptitle(title)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
274 if filterValues:
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
275 filterAttributes = list(map(lambda x: x[1], filterValues))
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
276 plt.title("BioGRID filters: %s" % filterAttributes, fontsize=10)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
277 x, y, xMax = getXY(prediction, positive, positiveCount, negative)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
278 plt.plot(x, y)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
279 plt.plot([0, xMax], [0, xMax])
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
280 plt.savefig(args.output, format="png")
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
281
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
282
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
283 if __name__ == "__main__":
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
284 parser = argparse.ArgumentParser(description='Create ROC plot.')
34
b300ddbbf9d0 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 31
diff changeset
285 parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True)
30
b0e195a47df7 "planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
guerler
parents: 29
diff changeset
286 parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True)
35
0bcc0a269916 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 34
diff changeset
287 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
0bcc0a269916 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 34
diff changeset
288 parser.add_argument('-r', '--regions', help='Comma-separated subcellular locations', required=False)
0bcc0a269916 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 34
diff changeset
289 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False)
0bcc0a269916 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 34
diff changeset
290 parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', required=False)
0bcc0a269916 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 34
diff changeset
291 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False)
0bcc0a269916 "planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents: 34
diff changeset
292 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False)
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
293 parser.add_argument('-o', '--output', help='Output (png)', required=True)
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
294 args = parser.parse_args()
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff changeset
295 main(args)