annotate spring_mcc.py @ 39:172398348efd draft

"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
author guerler
date Fri, 22 Jan 2021 15:50:27 +0000
parents
children f316caf098a6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
1 #! /usr/bin/env python
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
2 import argparse
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
3 import math
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
4 from os.path import isfile
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
5 import re
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
6 from matplotlib import pyplot as plt
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
7
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
8
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
9 def getIds(rawIds):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
10 return rawIds.split("|")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
11
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
12
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
13 def getCenterId(rawId):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
14 elements = rawId.split("|")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
15 if len(elements) > 1:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
16 return elements[1]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
17 return rawId
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
18
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
19
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
20 def getOrganism(rawId):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
21 elements = rawId.split("_")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
22 return elements[-1]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
23
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
24
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
25 def getKey(a, b):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
26 if a > b:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
27 name = "%s_%s" % (a, b)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
28 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
29 name = "%s_%s" % (b, a)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
30 return name
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
31
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
32
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
33 def getPercentage(rate, denominator):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
34 if denominator > 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
35 return 100.0 * rate / denominator
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
36 return 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
37
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
38
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
39 def getFilter(filterName):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
40 print("Loading target organism(s)...")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
41 filterSets = dict()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
42 with open(filterName) as filterFile:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
43 for line in filterFile:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
44 columns = line.split()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
45 for colIndex in [0, 1]:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
46 if colIndex >= len(columns):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
47 break
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
48 colEntry = columns[colIndex]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
49 id = getCenterId(colEntry)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
50 organism = getOrganism(colEntry)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
51 if organism not in filterSets:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
52 filterSets[organism] = set()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
53 filterSets[organism].add(id)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
54 print("Organism(s) in set: %s." % filterSets.keys())
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
55 return filterSets
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
56
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
57
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
58 def getReference(fileName, filterA=None, filterB=None, minScore=None, aCol=0,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
59 bCol=1, scoreCol=-1, separator=None,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
60 skipFirstLine=False, filterValues=list()):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
61
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
62 index = dict()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
63 count = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
64 with open(fileName) as fp:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
65 line = fp.readline()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
66 if skipFirstLine:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
67 line = fp.readline()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
68 while line:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
69 ls = line.split(separator)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
70 skipEntry = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
71 if separator is not None:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
72 aList = getIds(ls[aCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
73 bList = getIds(ls[bCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
74 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
75 aId = getCenterId(ls[aCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
76 bId = getCenterId(ls[bCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
77 aList = [aId]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
78 bList = [bId]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
79 if not skipEntry:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
80 validEntry = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
81 for a in aList:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
82 for b in bList:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
83 skip = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
84 if a == "-" or b == "-":
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
85 skip = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
86 if filterA is not None and filterB is not None:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
87 skip = not ((a in filterA and b in filterB) or (a in filterB and b in filterA))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
88 for f in filterValues:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
89 if len(ls) > f[0]:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
90 columnEntry = ls[f[0]].lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
91 searchEntry = f[1].lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
92 if columnEntry.find(searchEntry) == -1:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
93 skip = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
94 if not skip:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
95 name = getKey(a, b)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
96 if name not in index:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
97 validEntry = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
98 if scoreCol >= 0 and len(ls) > scoreCol:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
99 score = float(ls[scoreCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
100 skip = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
101 if minScore is not None:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
102 if minScore > score:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
103 return index, count
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
104 if not skip:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
105 index[name] = score
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
106 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
107 index[name] = 1.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
108 if validEntry:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
109 count = count + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
110 line = fp.readline()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
111 return index, count
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
112
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
113
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
114 def getMCC(prediction, positive, positiveCount, negative):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
115 sortedPrediction = sorted(prediction.items(), key=lambda x: x[1],
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
116 reverse=True)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
117 positiveTotal = positiveCount
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
118 negativeTotal = len(negative)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
119 x = list([0])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
120 y = list([0])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
121 xMax = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
122 topCount = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
123 topMCC = 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
124 topFP = 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
125 topTP = 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
126 topScore = 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
127 tp = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
128 fp = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
129 count = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
130 for (name, score) in sortedPrediction:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
131 found = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
132 if name in positive:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
133 found = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
134 tp = tp + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
135 if name in negative:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
136 found = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
137 fp = fp + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
138 fn = positiveTotal - tp
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
139 tn = negativeTotal - fp
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
140 denom = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
141 yValue = getPercentage(tp, tp + fn)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
142 xValue = getPercentage(fp, fp + tn)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
143 if denom > 0.0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
144 mcc = (tp*tn-fp*fn)/math.sqrt(denom)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
145 if mcc >= topMCC:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
146 topMCC = mcc
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
147 topScore = score
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
148 topCount = count
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
149 topFP = xValue
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
150 topTP = yValue
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
151 if found:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
152 y.append(yValue)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
153 x.append(xValue)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
154 xMax = max(xValue, xMax)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
155 count = count + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
156 if len(sortedPrediction) > 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
157 print("Top ranking prediction %s." % str(sortedPrediction[0]))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
158 print("Total count of prediction set: %s (tp=%1.2f, fp=%1.2f)." % (topCount, topTP, topFP))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
159 print("Total count of positive set: %s." % len(positive))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
160 print("Total count of negative set: %s." % len(negative))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
161 print("Matthews-Correlation-Coefficient: %s at Score >= %s." % (round(topMCC, 2), topScore))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
162 return topMCC
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
163
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
164
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
165 def getNegativeSet(args, filterA, filterB, negativeRequired, jSize=5):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
166 # determine negative set
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
167 print("Identifying non-interacting pairs...")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
168 negative = set()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
169 if args.negative and isfile(args.negative):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
170 # load from explicit file
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
171 with open(args.negative) as file:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
172 for line in file:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
173 cols = line.split()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
174 nameA = cols[0]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
175 nameB = cols[1]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
176 key = getKey(nameA, nameB)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
177 if key not in negative:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
178 negative.add(key)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
179 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
180 if args.region_a and args.region_b:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
181 locations = dict()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
182 regionA = args.region_a.lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
183 regionB = args.region_b.lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
184 locations[regionA] = list()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
185 locations[regionB] = list()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
186 regions = [regionA, regionB]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
187 print("Filtering regions %s" % str(regions))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
188 with open(args.locations) as locFile:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
189 for line in locFile:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
190 searchKey = "SUBCELLULAR LOCATION"
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
191 searchPos = line.find(searchKey)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
192 if searchPos != -1:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
193 uniId = line.split()[0]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
194 if uniId not in filterA and uniId not in filterB:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
195 continue
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
196 locStart = searchPos + len(searchKey) + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
197 locId = line[locStart:]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
198 locId = re.sub(r"\s*{.*}\s*", "", locId)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
199 locId = locId.replace(".", ",")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
200 locId = locId.strip().lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
201 filter_pos = locId.find("note=")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
202 if filter_pos >= 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
203 locId = locId[:filter_pos]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
204 filter_pos = locId.find(";")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
205 if filter_pos >= 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
206 locId = locId[:filter_pos]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
207 if locId:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
208 locId = list(map(lambda x: x.strip(), locId.split(",")))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
209 if (regionA in locId and regionB not in locId):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
210 locations[regionA].append(uniId)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
211 elif (regionA not in locId and regionB in locId):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
212 locations[regionB].append(uniId)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
213 filterAList = sorted(locations[regionA])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
214 filterBList = sorted(locations[regionB])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
215 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
216 filterAList = list(filterA)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
217 filterBList = list(filterB)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
218 for i, j in randomPairs(len(filterAList), len(filterBList), jSize):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
219 nameA = filterAList[i]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
220 nameB = filterBList[j]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
221 key = getKey(nameA, nameB)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
222 if key not in negative:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
223 negative.add(key)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
224 negativeRequired = negativeRequired - 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
225 if negativeRequired == 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
226 break
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
227 return negative
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
228
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
229
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
230 def randomPairs(iLen, jLen, jSize):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
231 i = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
232 jStart = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
233 while i < iLen:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
234 jMax = min(jStart + jSize, jLen)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
235 for j in range(jStart, jMax):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
236 yield i, j
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
237 i = i + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
238 if i == iLen and jMax < jLen:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
239 i = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
240 jStart = jStart + jSize + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
241
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
242
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
243 def main(args):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
244 # load source files
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
245 filterSets = getFilter(args.input)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
246 filterKeys = list(filterSets.keys())
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
247 filterA = filterSets[filterKeys[0]]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
248 if len(filterKeys) > 1:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
249 filterB = filterSets[filterKeys[1]]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
250 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
251 filterB = filterA
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
252
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
253 # identify biogrid filter options
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
254 filterValues = list()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
255 filterValues.append([11, args.method])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
256
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
257 # process biogrid database
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
258 print("Loading positive set from BioGRID file...")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
259 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
260 separator="\t", filterA=filterA,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
261 filterB=filterB, skipFirstLine=True,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
262 filterValues=filterValues)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
263
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
264 # estimate negative set
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
265 negative = getNegativeSet(args, filterA, filterB, positiveCount)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
266
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
267 # get prediction results
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
268 print("Loading prediction file...")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
269 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
270 mcc = getMCC(prediction, positive, positiveCount, negative)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
271 yValues = [mcc]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
272 yTicks = ["SPRING"]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
273
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
274 # identify biogrid filter options
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
275 for method in ["Affinity Capture-MS",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
276 "Biochemical Activity",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
277 "Co-crystal Structure",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
278 "Co-fractionation",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
279 "Co-localization",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
280 "Co-purification",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
281 "Far Western",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
282 "FRET",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
283 "PCA",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
284 "Reconstituted Complex",
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
285 "Two-hybrid"]:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
286 if args.method != method:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
287 print("Method: %s" % method)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
288 filterValues = [[11, method]]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
289 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
290 separator="\t", filterA=filterA,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
291 filterB=filterB, skipFirstLine=True,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
292 filterValues=filterValues)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
293 mcc = getMCC(prediction, positive, positiveCount, negative)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
294 yValues.append(mcc)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
295 yTicks.append(method)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
296
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
297 # create plot
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
298 print("Producing plot data...")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
299 print("Total count in prediction file: %d." % len(prediction))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
300 print("Total count in positive file: %d." % len(positive))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
301 plt.xlabel("Matthews-Correlation Coefficient (MCC)")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
302 plt.title("Positive set: %s" % args.method)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
303 plt.barh(yTicks, yValues)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
304 plt.tight_layout()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
305 plt.savefig(args.output, format="png")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
306
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
307
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
308 if __name__ == "__main__":
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
309 parser = argparse.ArgumentParser(description='Create ROC plot.')
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
310 parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
311 parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
312 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
313 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
314 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
315 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
316 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
317 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
318 parser.add_argument('-o', '--output', help='Output (png)', required=True)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
319 args = parser.parse_args()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
320 main(args)