annotate spring_mcc.py @ 41:f316caf098a6 draft default tip

"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
author guerler
date Mon, 01 Mar 2021 15:02:36 +0000
parents 172398348efd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
1 #! /usr/bin/env python
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
2 import argparse
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
3 import math
41
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
4 import pandas as pd
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
5 from os.path import isfile
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
6 import re
41
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
7
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
8 METHODS = ["Biochemical Activity",
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
9 "Co-fractionation",
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
10 "Co-localization",
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
11 "Far Western",
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
12 "FRET",
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
13 "PCA",
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
14 "Co-crystal Structure",
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
15 "Co-purification",
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
16 "Two-hybrid",
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
17 "Affinity Capture-MS"]
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
18
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
19
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
20 def getIds(rawIds):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
21 return rawIds.split("|")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
22
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
23
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
24 def getCenterId(rawId):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
25 elements = rawId.split("|")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
26 if len(elements) > 1:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
27 return elements[1]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
28 return rawId
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
29
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
30
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
31 def getOrganism(rawId):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
32 elements = rawId.split("_")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
33 return elements[-1]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
34
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
35
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
36 def getKey(a, b):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
37 if a > b:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
38 name = "%s_%s" % (a, b)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
39 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
40 name = "%s_%s" % (b, a)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
41 return name
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
42
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
43
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
44 def getPercentage(rate, denominator):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
45 if denominator > 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
46 return 100.0 * rate / denominator
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
47 return 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
48
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
49
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
50 def getFilter(filterName):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
51 print("Loading target organism(s)...")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
52 filterSets = dict()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
53 with open(filterName) as filterFile:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
54 for line in filterFile:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
55 columns = line.split()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
56 for colIndex in [0, 1]:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
57 if colIndex >= len(columns):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
58 break
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
59 colEntry = columns[colIndex]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
60 id = getCenterId(colEntry)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
61 organism = getOrganism(colEntry)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
62 if organism not in filterSets:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
63 filterSets[organism] = set()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
64 filterSets[organism].add(id)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
65 print("Organism(s) in set: %s." % filterSets.keys())
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
66 return filterSets
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
67
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
68
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
69 def getReference(fileName, filterA=None, filterB=None, minScore=None, aCol=0,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
70 bCol=1, scoreCol=-1, separator=None,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
71 skipFirstLine=False, filterValues=list()):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
72
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
73 index = dict()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
74 count = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
75 with open(fileName) as fp:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
76 line = fp.readline()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
77 if skipFirstLine:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
78 line = fp.readline()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
79 while line:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
80 ls = line.split(separator)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
81 skipEntry = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
82 if separator is not None:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
83 aList = getIds(ls[aCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
84 bList = getIds(ls[bCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
85 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
86 aId = getCenterId(ls[aCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
87 bId = getCenterId(ls[bCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
88 aList = [aId]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
89 bList = [bId]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
90 if not skipEntry:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
91 validEntry = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
92 for a in aList:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
93 for b in bList:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
94 skip = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
95 if a == "-" or b == "-":
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
96 skip = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
97 if filterA is not None and filterB is not None:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
98 skip = not ((a in filterA and b in filterB) or (a in filterB and b in filterA))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
99 for f in filterValues:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
100 if len(ls) > f[0]:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
101 columnEntry = ls[f[0]].lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
102 searchEntry = f[1].lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
103 if columnEntry.find(searchEntry) == -1:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
104 skip = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
105 if not skip:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
106 name = getKey(a, b)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
107 if name not in index:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
108 validEntry = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
109 if scoreCol >= 0 and len(ls) > scoreCol:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
110 score = float(ls[scoreCol])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
111 skip = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
112 if minScore is not None:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
113 if minScore > score:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
114 return index, count
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
115 if not skip:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
116 index[name] = score
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
117 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
118 index[name] = 1.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
119 if validEntry:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
120 count = count + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
121 line = fp.readline()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
122 return index, count
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
123
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
124
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
125 def getMCC(prediction, positive, positiveCount, negative):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
126 sortedPrediction = sorted(prediction.items(), key=lambda x: x[1],
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
127 reverse=True)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
128 positiveTotal = positiveCount
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
129 negativeTotal = len(negative)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
130 x = list([0])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
131 y = list([0])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
132 xMax = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
133 topCount = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
134 topMCC = 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
135 topFP = 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
136 topTP = 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
137 topScore = 0.0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
138 tp = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
139 fp = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
140 count = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
141 for (name, score) in sortedPrediction:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
142 found = False
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
143 if name in positive:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
144 found = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
145 tp = tp + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
146 if name in negative:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
147 found = True
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
148 fp = fp + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
149 fn = positiveTotal - tp
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
150 tn = negativeTotal - fp
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
151 denom = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
152 yValue = getPercentage(tp, tp + fn)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
153 xValue = getPercentage(fp, fp + tn)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
154 if denom > 0.0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
155 mcc = (tp*tn-fp*fn)/math.sqrt(denom)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
156 if mcc >= topMCC:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
157 topMCC = mcc
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
158 topScore = score
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
159 topCount = count
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
160 topFP = xValue
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
161 topTP = yValue
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
162 if found:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
163 y.append(yValue)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
164 x.append(xValue)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
165 xMax = max(xValue, xMax)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
166 count = count + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
167 if len(sortedPrediction) > 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
168 print("Top ranking prediction %s." % str(sortedPrediction[0]))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
169 print("Total count of prediction set: %s (tp=%1.2f, fp=%1.2f)." % (topCount, topTP, topFP))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
170 print("Total count of positive set: %s." % len(positive))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
171 print("Total count of negative set: %s." % len(negative))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
172 print("Matthews-Correlation-Coefficient: %s at Score >= %s." % (round(topMCC, 2), topScore))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
173 return topMCC
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
174
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
175
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
176 def getNegativeSet(args, filterA, filterB, negativeRequired, jSize=5):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
177 # determine negative set
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
178 print("Identifying non-interacting pairs...")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
179 negative = set()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
180 if args.negative and isfile(args.negative):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
181 # load from explicit file
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
182 with open(args.negative) as file:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
183 for line in file:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
184 cols = line.split()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
185 nameA = cols[0]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
186 nameB = cols[1]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
187 key = getKey(nameA, nameB)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
188 if key not in negative:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
189 negative.add(key)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
190 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
191 if args.region_a and args.region_b:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
192 locations = dict()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
193 regionA = args.region_a.lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
194 regionB = args.region_b.lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
195 locations[regionA] = list()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
196 locations[regionB] = list()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
197 regions = [regionA, regionB]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
198 print("Filtering regions %s" % str(regions))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
199 with open(args.locations) as locFile:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
200 for line in locFile:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
201 searchKey = "SUBCELLULAR LOCATION"
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
202 searchPos = line.find(searchKey)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
203 if searchPos != -1:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
204 uniId = line.split()[0]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
205 if uniId not in filterA and uniId not in filterB:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
206 continue
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
207 locStart = searchPos + len(searchKey) + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
208 locId = line[locStart:]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
209 locId = re.sub(r"\s*{.*}\s*", "", locId)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
210 locId = locId.replace(".", ",")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
211 locId = locId.strip().lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
212 filter_pos = locId.find("note=")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
213 if filter_pos >= 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
214 locId = locId[:filter_pos]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
215 filter_pos = locId.find(";")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
216 if filter_pos >= 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
217 locId = locId[:filter_pos]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
218 if locId:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
219 locId = list(map(lambda x: x.strip(), locId.split(",")))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
220 if (regionA in locId and regionB not in locId):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
221 locations[regionA].append(uniId)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
222 elif (regionA not in locId and regionB in locId):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
223 locations[regionB].append(uniId)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
224 filterAList = sorted(locations[regionA])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
225 filterBList = sorted(locations[regionB])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
226 else:
41
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
227 filterAList = sorted(filterA)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
228 filterBList = sorted(filterB)
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
229 for i, j in randomPairs(len(filterAList), len(filterBList), jSize):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
230 nameA = filterAList[i]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
231 nameB = filterBList[j]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
232 key = getKey(nameA, nameB)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
233 if key not in negative:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
234 negative.add(key)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
235 negativeRequired = negativeRequired - 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
236 if negativeRequired == 0:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
237 break
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
238 return negative
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
239
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
240
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
241 def randomPairs(iLen, jLen, jSize):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
242 i = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
243 jStart = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
244 while i < iLen:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
245 jMax = min(jStart + jSize, jLen)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
246 for j in range(jStart, jMax):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
247 yield i, j
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
248 i = i + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
249 if i == iLen and jMax < jLen:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
250 i = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
251 jStart = jStart + jSize + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
252
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
253
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
254 def main(args):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
255 # load source files
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
256 filterSets = getFilter(args.input)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
257 filterKeys = list(filterSets.keys())
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
258 filterA = filterSets[filterKeys[0]]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
259 if len(filterKeys) > 1:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
260 filterB = filterSets[filterKeys[1]]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
261 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
262 filterB = filterA
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
263
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
264 # identify biogrid filter options
41
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
265 performance = dict()
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
266 for methodReference in METHODS:
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
267
41
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
268 # process biogrid database
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
269 print("Loading positive set from BioGRID file (%s)..." % methodReference)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
270 filterValues = [[11, methodReference]]
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
271 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
272 separator="\t", filterA=filterA,
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
273 filterB=filterB, skipFirstLine=True,
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
274 filterValues=filterValues)
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
275
41
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
276 # estimate negative set
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
277 negative = getNegativeSet(args, filterA, filterB, positiveCount)
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
278
41
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
279 # evaluate other methods
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
280 yValues = list()
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
281 for method in METHODS:
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
282 if methodReference != method:
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
283 print("Method: %s" % method)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
284 filterValues = [[11, method]]
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
285 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26,
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
286 separator="\t", filterA=filterA,
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
287 filterB=filterB, skipFirstLine=True,
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
288 filterValues=filterValues)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
289 mcc = getMCC(prediction, positive, positiveCount, negative)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
290 yValues.append(mcc)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
291 else:
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
292 yValues.append(0.0)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
293
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
294 # add results to performance dication
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
295 performance[methodReference] = yValues
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
296
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
297 # get and append prediction results
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
298 print("Loading prediction file...")
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
299 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
300 mcc = getMCC(prediction, positive, positiveCount, negative)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
301 performance[methodReference].append(mcc)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
302
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
303 # build yTicks
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
304 yTicks = METHODS[:]
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
305 yTicks.append("SPRING")
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
306
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
307 # create plot
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
308 print("Producing plot data...")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
309 print("Total count in prediction file: %d." % len(prediction))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
310 print("Total count in positive file: %d." % len(positive))
41
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
311 df = pd.DataFrame(performance, index=yTicks)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
312 ax = df.plot.barh()
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
313 ax.set_title(args.experiment)
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
314 ax.set_xlabel("Matthews-Correlation Coefficient (MCC)")
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
315 plt = ax.get_figure()
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
316 plt.tight_layout()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
317 plt.savefig(args.output, format="png")
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
318
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
319
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
320 if __name__ == "__main__":
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
321 parser = argparse.ArgumentParser(description='Create ROC plot.')
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
322 parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
323 parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
324 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
325 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
326 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
327 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
328 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False)
41
f316caf098a6 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents: 39
diff changeset
329 parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results")
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
330 parser.add_argument('-o', '--output', help='Output (png)', required=True)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
331 args = parser.parse_args()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
332 main(args)