Mercurial > repos > guerler > springsuite
annotate spring_mcc.py @ 41:f316caf098a6 draft default tip
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
author | guerler |
---|---|
date | Mon, 01 Mar 2021 15:02:36 +0000 |
parents | 172398348efd |
children |
rev | line source |
---|---|
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
1 #! /usr/bin/env python |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
2 import argparse |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
3 import math |
41
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
4 import pandas as pd |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
5 from os.path import isfile |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
6 import re |
41
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
7 |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
8 METHODS = ["Biochemical Activity", |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
9 "Co-fractionation", |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
10 "Co-localization", |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
11 "Far Western", |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
12 "FRET", |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
13 "PCA", |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
14 "Co-crystal Structure", |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
15 "Co-purification", |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
16 "Two-hybrid", |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
17 "Affinity Capture-MS"] |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
18 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
19 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
20 def getIds(rawIds): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
21 return rawIds.split("|") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
22 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
23 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
24 def getCenterId(rawId): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
25 elements = rawId.split("|") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
26 if len(elements) > 1: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
27 return elements[1] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
28 return rawId |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
29 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
30 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
31 def getOrganism(rawId): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
32 elements = rawId.split("_") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
33 return elements[-1] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
34 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
35 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
36 def getKey(a, b): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
37 if a > b: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
38 name = "%s_%s" % (a, b) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
39 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
40 name = "%s_%s" % (b, a) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
41 return name |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
42 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
43 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
44 def getPercentage(rate, denominator): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
45 if denominator > 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
46 return 100.0 * rate / denominator |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
47 return 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
48 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
49 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
50 def getFilter(filterName): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
51 print("Loading target organism(s)...") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
52 filterSets = dict() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
53 with open(filterName) as filterFile: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
54 for line in filterFile: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
55 columns = line.split() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
56 for colIndex in [0, 1]: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
57 if colIndex >= len(columns): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
58 break |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
59 colEntry = columns[colIndex] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
60 id = getCenterId(colEntry) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
61 organism = getOrganism(colEntry) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
62 if organism not in filterSets: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
63 filterSets[organism] = set() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
64 filterSets[organism].add(id) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
65 print("Organism(s) in set: %s." % filterSets.keys()) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
66 return filterSets |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
67 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
68 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
69 def getReference(fileName, filterA=None, filterB=None, minScore=None, aCol=0, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
70 bCol=1, scoreCol=-1, separator=None, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
71 skipFirstLine=False, filterValues=list()): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
72 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
73 index = dict() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
74 count = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
75 with open(fileName) as fp: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
76 line = fp.readline() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
77 if skipFirstLine: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
78 line = fp.readline() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
79 while line: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
80 ls = line.split(separator) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
81 skipEntry = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
82 if separator is not None: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
83 aList = getIds(ls[aCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
84 bList = getIds(ls[bCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
85 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
86 aId = getCenterId(ls[aCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
87 bId = getCenterId(ls[bCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
88 aList = [aId] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
89 bList = [bId] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
90 if not skipEntry: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
91 validEntry = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
92 for a in aList: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
93 for b in bList: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
94 skip = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
95 if a == "-" or b == "-": |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
96 skip = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
97 if filterA is not None and filterB is not None: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
98 skip = not ((a in filterA and b in filterB) or (a in filterB and b in filterA)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
99 for f in filterValues: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
100 if len(ls) > f[0]: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
101 columnEntry = ls[f[0]].lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
102 searchEntry = f[1].lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
103 if columnEntry.find(searchEntry) == -1: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
104 skip = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
105 if not skip: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
106 name = getKey(a, b) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
107 if name not in index: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
108 validEntry = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
109 if scoreCol >= 0 and len(ls) > scoreCol: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
110 score = float(ls[scoreCol]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
111 skip = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
112 if minScore is not None: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
113 if minScore > score: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
114 return index, count |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
115 if not skip: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
116 index[name] = score |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
117 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
118 index[name] = 1.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
119 if validEntry: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
120 count = count + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
121 line = fp.readline() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
122 return index, count |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
123 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
124 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
125 def getMCC(prediction, positive, positiveCount, negative): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
126 sortedPrediction = sorted(prediction.items(), key=lambda x: x[1], |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
127 reverse=True) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
128 positiveTotal = positiveCount |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
129 negativeTotal = len(negative) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
130 x = list([0]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
131 y = list([0]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
132 xMax = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
133 topCount = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
134 topMCC = 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
135 topFP = 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
136 topTP = 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
137 topScore = 0.0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
138 tp = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
139 fp = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
140 count = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
141 for (name, score) in sortedPrediction: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
142 found = False |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
143 if name in positive: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
144 found = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
145 tp = tp + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
146 if name in negative: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
147 found = True |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
148 fp = fp + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
149 fn = positiveTotal - tp |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
150 tn = negativeTotal - fp |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
151 denom = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
152 yValue = getPercentage(tp, tp + fn) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
153 xValue = getPercentage(fp, fp + tn) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
154 if denom > 0.0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
155 mcc = (tp*tn-fp*fn)/math.sqrt(denom) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
156 if mcc >= topMCC: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
157 topMCC = mcc |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
158 topScore = score |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
159 topCount = count |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
160 topFP = xValue |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
161 topTP = yValue |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
162 if found: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
163 y.append(yValue) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
164 x.append(xValue) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
165 xMax = max(xValue, xMax) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
166 count = count + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
167 if len(sortedPrediction) > 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
168 print("Top ranking prediction %s." % str(sortedPrediction[0])) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
169 print("Total count of prediction set: %s (tp=%1.2f, fp=%1.2f)." % (topCount, topTP, topFP)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
170 print("Total count of positive set: %s." % len(positive)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
171 print("Total count of negative set: %s." % len(negative)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
172 print("Matthews-Correlation-Coefficient: %s at Score >= %s." % (round(topMCC, 2), topScore)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
173 return topMCC |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
174 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
175 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
176 def getNegativeSet(args, filterA, filterB, negativeRequired, jSize=5): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
177 # determine negative set |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
178 print("Identifying non-interacting pairs...") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
179 negative = set() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
180 if args.negative and isfile(args.negative): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
181 # load from explicit file |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
182 with open(args.negative) as file: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
183 for line in file: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
184 cols = line.split() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
185 nameA = cols[0] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
186 nameB = cols[1] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
187 key = getKey(nameA, nameB) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
188 if key not in negative: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
189 negative.add(key) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
190 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
191 if args.region_a and args.region_b: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
192 locations = dict() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
193 regionA = args.region_a.lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
194 regionB = args.region_b.lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
195 locations[regionA] = list() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
196 locations[regionB] = list() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
197 regions = [regionA, regionB] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
198 print("Filtering regions %s" % str(regions)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
199 with open(args.locations) as locFile: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
200 for line in locFile: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
201 searchKey = "SUBCELLULAR LOCATION" |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
202 searchPos = line.find(searchKey) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
203 if searchPos != -1: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
204 uniId = line.split()[0] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
205 if uniId not in filterA and uniId not in filterB: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
206 continue |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
207 locStart = searchPos + len(searchKey) + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
208 locId = line[locStart:] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
209 locId = re.sub(r"\s*{.*}\s*", "", locId) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
210 locId = locId.replace(".", ",") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
211 locId = locId.strip().lower() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
212 filter_pos = locId.find("note=") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
213 if filter_pos >= 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
214 locId = locId[:filter_pos] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
215 filter_pos = locId.find(";") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
216 if filter_pos >= 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
217 locId = locId[:filter_pos] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
218 if locId: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
219 locId = list(map(lambda x: x.strip(), locId.split(","))) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
220 if (regionA in locId and regionB not in locId): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
221 locations[regionA].append(uniId) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
222 elif (regionA not in locId and regionB in locId): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
223 locations[regionB].append(uniId) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
224 filterAList = sorted(locations[regionA]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
225 filterBList = sorted(locations[regionB]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
226 else: |
41
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
227 filterAList = sorted(filterA) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
228 filterBList = sorted(filterB) |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
229 for i, j in randomPairs(len(filterAList), len(filterBList), jSize): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
230 nameA = filterAList[i] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
231 nameB = filterBList[j] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
232 key = getKey(nameA, nameB) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
233 if key not in negative: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
234 negative.add(key) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
235 negativeRequired = negativeRequired - 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
236 if negativeRequired == 0: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
237 break |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
238 return negative |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
239 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
240 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
241 def randomPairs(iLen, jLen, jSize): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
242 i = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
243 jStart = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
244 while i < iLen: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
245 jMax = min(jStart + jSize, jLen) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
246 for j in range(jStart, jMax): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
247 yield i, j |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
248 i = i + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
249 if i == iLen and jMax < jLen: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
250 i = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
251 jStart = jStart + jSize + 1 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
252 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
253 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
254 def main(args): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
255 # load source files |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
256 filterSets = getFilter(args.input) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
257 filterKeys = list(filterSets.keys()) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
258 filterA = filterSets[filterKeys[0]] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
259 if len(filterKeys) > 1: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
260 filterB = filterSets[filterKeys[1]] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
261 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
262 filterB = filterA |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
263 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
264 # identify biogrid filter options |
41
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
265 performance = dict() |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
266 for methodReference in METHODS: |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
267 |
41
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
268 # process biogrid database |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
269 print("Loading positive set from BioGRID file (%s)..." % methodReference) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
270 filterValues = [[11, methodReference]] |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
271 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
272 separator="\t", filterA=filterA, |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
273 filterB=filterB, skipFirstLine=True, |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
274 filterValues=filterValues) |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
275 |
41
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
276 # estimate negative set |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
277 negative = getNegativeSet(args, filterA, filterB, positiveCount) |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
278 |
41
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
279 # evaluate other methods |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
280 yValues = list() |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
281 for method in METHODS: |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
282 if methodReference != method: |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
283 print("Method: %s" % method) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
284 filterValues = [[11, method]] |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
285 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
286 separator="\t", filterA=filterA, |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
287 filterB=filterB, skipFirstLine=True, |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
288 filterValues=filterValues) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
289 mcc = getMCC(prediction, positive, positiveCount, negative) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
290 yValues.append(mcc) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
291 else: |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
292 yValues.append(0.0) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
293 |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
294 # add results to performance dication |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
295 performance[methodReference] = yValues |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
296 |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
297 # get and append prediction results |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
298 print("Loading prediction file...") |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
299 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
300 mcc = getMCC(prediction, positive, positiveCount, negative) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
301 performance[methodReference].append(mcc) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
302 |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
303 # build yTicks |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
304 yTicks = METHODS[:] |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
305 yTicks.append("SPRING") |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
306 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
307 # create plot |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
308 print("Producing plot data...") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
309 print("Total count in prediction file: %d." % len(prediction)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
310 print("Total count in positive file: %d." % len(positive)) |
41
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
311 df = pd.DataFrame(performance, index=yTicks) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
312 ax = df.plot.barh() |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
313 ax.set_title(args.experiment) |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
314 ax.set_xlabel("Matthews-Correlation Coefficient (MCC)") |
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
315 plt = ax.get_figure() |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
316 plt.tight_layout() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
317 plt.savefig(args.output, format="png") |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
318 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
319 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
320 if __name__ == "__main__": |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
321 parser = argparse.ArgumentParser(description='Create ROC plot.') |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
322 parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
323 parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
324 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
325 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
326 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
327 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
328 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) |
41
f316caf098a6
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
guerler
parents:
39
diff
changeset
|
329 parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results") |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
330 parser.add_argument('-o', '--output', help='Output (png)', required=True) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
331 args = parser.parse_args() |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff
changeset
|
332 main(args) |