Mercurial > repos > guerler > springsuite
annotate spring_mcc.py @ 39:172398348efd draft
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
author | guerler |
---|---|
date | Fri, 22 Jan 2021 15:50:27 +0000 |
parents | |
children | f316caf098a6 |
rev | line source |
---|---|
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
1 #! /usr/bin/env python |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
2 import argparse |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
3 import math |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
4 from os.path import isfile |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
5 import re |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
6 from matplotlib import pyplot as plt |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
7 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
8 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
9 def getIds(rawIds): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
10 return rawIds.split("|") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
11 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
12 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
13 def getCenterId(rawId): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
14 elements = rawId.split("|") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
15 if len(elements) > 1: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
16 return elements[1] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
17 return rawId |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
18 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
19 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
20 def getOrganism(rawId): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
21 elements = rawId.split("_") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
22 return elements[-1] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
23 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
24 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
25 def getKey(a, b): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
26 if a > b: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
27 name = "%s_%s" % (a, b) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
28 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
29 name = "%s_%s" % (b, a) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
30 return name |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
31 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
32 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
33 def getPercentage(rate, denominator): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
34 if denominator > 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
35 return 100.0 * rate / denominator |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
36 return 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
37 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
38 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
39 def getFilter(filterName): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
40 print("Loading target organism(s)...") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
41 filterSets = dict() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
42 with open(filterName) as filterFile: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
43 for line in filterFile: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
44 columns = line.split() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
45 for colIndex in [0, 1]: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
46 if colIndex >= len(columns): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
47 break |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
48 colEntry = columns[colIndex] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
49 id = getCenterId(colEntry) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
50 organism = getOrganism(colEntry) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
51 if organism not in filterSets: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
52 filterSets[organism] = set() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
53 filterSets[organism].add(id) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
54 print("Organism(s) in set: %s." % filterSets.keys()) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
55 return filterSets |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
56 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
57 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
58 def getReference(fileName, filterA=None, filterB=None, minScore=None, aCol=0, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
59 bCol=1, scoreCol=-1, separator=None, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
60 skipFirstLine=False, filterValues=list()): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
61 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
62 index = dict() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
63 count = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
64 with open(fileName) as fp: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
65 line = fp.readline() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
66 if skipFirstLine: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
67 line = fp.readline() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
68 while line: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
69 ls = line.split(separator) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
70 skipEntry = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
71 if separator is not None: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
72 aList = getIds(ls[aCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
73 bList = getIds(ls[bCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
74 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
75 aId = getCenterId(ls[aCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
76 bId = getCenterId(ls[bCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
77 aList = [aId] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
78 bList = [bId] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
79 if not skipEntry: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
80 validEntry = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
81 for a in aList: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
82 for b in bList: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
83 skip = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
84 if a == "-" or b == "-": |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
85 skip = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
86 if filterA is not None and filterB is not None: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
87 skip = not ((a in filterA and b in filterB) or (a in filterB and b in filterA)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
88 for f in filterValues: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
89 if len(ls) > f[0]: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
90 columnEntry = ls[f[0]].lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
91 searchEntry = f[1].lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
92 if columnEntry.find(searchEntry) == -1: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
93 skip = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
94 if not skip: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
95 name = getKey(a, b) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
96 if name not in index: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
97 validEntry = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
98 if scoreCol >= 0 and len(ls) > scoreCol: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
99 score = float(ls[scoreCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
100 skip = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
101 if minScore is not None: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
102 if minScore > score: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
103 return index, count |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
104 if not skip: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
105 index[name] = score |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
106 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
107 index[name] = 1.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
108 if validEntry: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
109 count = count + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
110 line = fp.readline() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
111 return index, count |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
112 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
113 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
114 def getMCC(prediction, positive, positiveCount, negative): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
115 sortedPrediction = sorted(prediction.items(), key=lambda x: x[1], |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
116 reverse=True) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
117 positiveTotal = positiveCount |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
118 negativeTotal = len(negative) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
119 x = list([0]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
120 y = list([0]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
121 xMax = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
122 topCount = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
123 topMCC = 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
124 topFP = 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
125 topTP = 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
126 topScore = 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
127 tp = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
128 fp = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
129 count = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
130 for (name, score) in sortedPrediction: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
131 found = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
132 if name in positive: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
133 found = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
134 tp = tp + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
135 if name in negative: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
136 found = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
137 fp = fp + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
138 fn = positiveTotal - tp |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
139 tn = negativeTotal - fp |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
140 denom = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
141 yValue = getPercentage(tp, tp + fn) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
142 xValue = getPercentage(fp, fp + tn) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
143 if denom > 0.0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
144 mcc = (tp*tn-fp*fn)/math.sqrt(denom) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
145 if mcc >= topMCC: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
146 topMCC = mcc |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
147 topScore = score |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
148 topCount = count |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
149 topFP = xValue |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
150 topTP = yValue |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
151 if found: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
152 y.append(yValue) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
153 x.append(xValue) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
154 xMax = max(xValue, xMax) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
155 count = count + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
156 if len(sortedPrediction) > 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
157 print("Top ranking prediction %s." % str(sortedPrediction[0])) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
158 print("Total count of prediction set: %s (tp=%1.2f, fp=%1.2f)." % (topCount, topTP, topFP)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
159 print("Total count of positive set: %s." % len(positive)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
160 print("Total count of negative set: %s." % len(negative)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
161 print("Matthews-Correlation-Coefficient: %s at Score >= %s." % (round(topMCC, 2), topScore)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
162 return topMCC |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
163 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
164 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
165 def getNegativeSet(args, filterA, filterB, negativeRequired, jSize=5): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
166 # determine negative set |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
167 print("Identifying non-interacting pairs...") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
168 negative = set() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
169 if args.negative and isfile(args.negative): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
170 # load from explicit file |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
171 with open(args.negative) as file: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
172 for line in file: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
173 cols = line.split() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
174 nameA = cols[0] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
175 nameB = cols[1] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
176 key = getKey(nameA, nameB) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
177 if key not in negative: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
178 negative.add(key) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
179 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
180 if args.region_a and args.region_b: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
181 locations = dict() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
182 regionA = args.region_a.lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
183 regionB = args.region_b.lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
184 locations[regionA] = list() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
185 locations[regionB] = list() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
186 regions = [regionA, regionB] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
187 print("Filtering regions %s" % str(regions)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
188 with open(args.locations) as locFile: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
189 for line in locFile: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
190 searchKey = "SUBCELLULAR LOCATION" |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
191 searchPos = line.find(searchKey) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
192 if searchPos != -1: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
193 uniId = line.split()[0] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
194 if uniId not in filterA and uniId not in filterB: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
195 continue |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
196 locStart = searchPos + len(searchKey) + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
197 locId = line[locStart:] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
198 locId = re.sub(r"\s*{.*}\s*", "", locId) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
199 locId = locId.replace(".", ",") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
200 locId = locId.strip().lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
201 filter_pos = locId.find("note=") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
202 if filter_pos >= 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
203 locId = locId[:filter_pos] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
204 filter_pos = locId.find(";") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
205 if filter_pos >= 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
206 locId = locId[:filter_pos] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
207 if locId: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
208 locId = list(map(lambda x: x.strip(), locId.split(","))) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
209 if (regionA in locId and regionB not in locId): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
210 locations[regionA].append(uniId) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
211 elif (regionA not in locId and regionB in locId): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
212 locations[regionB].append(uniId) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
213 filterAList = sorted(locations[regionA]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
214 filterBList = sorted(locations[regionB]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
215 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
216 filterAList = list(filterA) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
217 filterBList = list(filterB) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
218 for i, j in randomPairs(len(filterAList), len(filterBList), jSize): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
219 nameA = filterAList[i] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
220 nameB = filterBList[j] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
221 key = getKey(nameA, nameB) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
222 if key not in negative: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
223 negative.add(key) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
224 negativeRequired = negativeRequired - 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
225 if negativeRequired == 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
226 break |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
227 return negative |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
228 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
229 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
230 def randomPairs(iLen, jLen, jSize): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
231 i = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
232 jStart = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
233 while i < iLen: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
234 jMax = min(jStart + jSize, jLen) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
235 for j in range(jStart, jMax): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
236 yield i, j |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
237 i = i + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
238 if i == iLen and jMax < jLen: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
239 i = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
240 jStart = jStart + jSize + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
241 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
242 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
243 def main(args): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
244 # load source files |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
245 filterSets = getFilter(args.input) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
246 filterKeys = list(filterSets.keys()) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
247 filterA = filterSets[filterKeys[0]] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
248 if len(filterKeys) > 1: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
249 filterB = filterSets[filterKeys[1]] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
250 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
251 filterB = filterA |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
252 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
253 # identify biogrid filter options |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
254 filterValues = list() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
255 filterValues.append([11, args.method]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
256 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
257 # process biogrid database |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
258 print("Loading positive set from BioGRID file...") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
259 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
260 separator="\t", filterA=filterA, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
261 filterB=filterB, skipFirstLine=True, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
262 filterValues=filterValues) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
263 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
264 # estimate negative set |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
265 negative = getNegativeSet(args, filterA, filterB, positiveCount) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
266 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
267 # get prediction results |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
268 print("Loading prediction file...") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
269 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
270 mcc = getMCC(prediction, positive, positiveCount, negative) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
271 yValues = [mcc] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
272 yTicks = ["SPRING"] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
273 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
274 # identify biogrid filter options |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
275 for method in ["Affinity Capture-MS", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
276 "Biochemical Activity", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
277 "Co-crystal Structure", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
278 "Co-fractionation", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
279 "Co-localization", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
280 "Co-purification", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
281 "Far Western", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
282 "FRET", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
283 "PCA", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
284 "Reconstituted Complex", |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
285 "Two-hybrid"]: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
286 if args.method != method: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
287 print("Method: %s" % method) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
288 filterValues = [[11, method]] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
289 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
290 separator="\t", filterA=filterA, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
291 filterB=filterB, skipFirstLine=True, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
292 filterValues=filterValues) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
293 mcc = getMCC(prediction, positive, positiveCount, negative) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
294 yValues.append(mcc) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
295 yTicks.append(method) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
296 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
297 # create plot |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
298 print("Producing plot data...") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
299 print("Total count in prediction file: %d." % len(prediction)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
300 print("Total count in positive file: %d." % len(positive)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
301 plt.xlabel("Matthews-Correlation Coefficient (MCC)") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
302 plt.title("Positive set: %s" % args.method) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
303 plt.barh(yTicks, yValues) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
304 plt.tight_layout() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
305 plt.savefig(args.output, format="png") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
306 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
307 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
308 if __name__ == "__main__": |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
309 parser = argparse.ArgumentParser(description='Create ROC plot.') |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
310 parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
311 parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
312 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
313 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
314 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
315 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
316 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
317 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
318 parser.add_argument('-o', '--output', help='Output (png)', required=True) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
319 args = parser.parse_args() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
320 main(args) |