Mercurial > repos > guerler > springsuite
annotate spring_roc.py @ 36:2fe8ffff530d draft
"planemo upload commit 135985d861c02f01fb4fb139834dc4d9b9521f53-dirty"
author | guerler |
---|---|
date | Tue, 24 Nov 2020 18:16:01 +0000 |
parents | 0bcc0a269916 |
children |
rev | line source |
---|---|
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
1 #! /usr/bin/env python |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
2 import argparse |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
3 import math |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
4 import random |
30
b0e195a47df7
"planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
guerler
parents:
29
diff
changeset
|
5 from os.path import isfile |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
6 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
7 from matplotlib import pyplot as plt |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
8 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
9 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
10 def getIds(rawIds): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
11 return rawIds.split("|") |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
12 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
13 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
14 def getCenterId(rawId): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
15 elements = rawId.split("|") |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
16 if len(elements) > 1: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
17 return elements[1] |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
18 return rawId |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
19 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
20 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
21 def getOrganism(rawId): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
22 elements = rawId.split("_") |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
23 return elements[-1] |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
24 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
25 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
26 def getKey(a, b): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
27 if a > b: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
28 name = "%s_%s" % (a, b) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
29 else: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
30 name = "%s_%s" % (b, a) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
31 return name |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
32 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
33 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
34 def getPercentage(rate, denominator): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
35 if denominator > 0: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
36 return 100.0 * rate / denominator |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
37 return 0.0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
38 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
39 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
40 def getFilter(filterName): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
41 print("Loading target organism(s)...") |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
42 filterSets = dict() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
43 with open(filterName) as filterFile: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
44 for line in filterFile: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
45 columns = line.split() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
46 for colIndex in [0, 1]: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
47 if colIndex >= len(columns): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
48 break |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
49 colEntry = columns[colIndex] |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
50 id = getCenterId(colEntry) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
51 organism = getOrganism(colEntry) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
52 if organism not in filterSets: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
53 filterSets[organism] = set() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
54 filterSets[organism].add(id) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
55 print("Organism(s) in set: %s." % filterSets.keys()) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
56 return filterSets |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
57 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
58 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
59 def getReference(fileName, filterA=None, filterB=None, minScore=None, aCol=0, |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
60 bCol=1, scoreCol=-1, separator=None, |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
61 skipFirstLine=False, filterValues=list()): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
62 index = dict() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
63 count = 0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
64 with open(fileName) as fp: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
65 line = fp.readline() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
66 if skipFirstLine: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
67 line = fp.readline() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
68 while line: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
69 ls = line.split(separator) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
70 if separator is not None: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
71 aList = getIds(ls[aCol]) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
72 bList = getIds(ls[bCol]) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
73 else: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
74 aList = [getCenterId(ls[aCol])] |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
75 bList = [getCenterId(ls[bCol])] |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
76 validEntry = False |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
77 for a in aList: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
78 for b in bList: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
79 skip = False |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
80 if a == "-" or b == "-": |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
81 skip = True |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
82 if filterA is not None: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
83 if a not in filterA and b not in filterA: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
84 skip = True |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
85 if filterB is not None: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
86 if a not in filterB and b not in filterB: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
87 skip = True |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
88 for f in filterValues: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
89 if len(ls) > f[0]: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
90 columnEntry = ls[f[0]].lower() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
91 searchEntry = f[1].lower() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
92 if columnEntry.find(searchEntry) == -1: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
93 skip = True |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
94 if not skip: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
95 name = getKey(a, b) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
96 if name not in index: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
97 validEntry = True |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
98 if scoreCol >= 0 and len(ls) > scoreCol: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
99 score = float(ls[scoreCol]) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
100 skip = False |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
101 if minScore is not None: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
102 if minScore > score: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
103 return index, count |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
104 if not skip: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
105 index[name] = score |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
106 else: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
107 index[name] = 1.0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
108 if validEntry: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
109 count = count + 1 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
110 line = fp.readline() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
111 return index, count |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
112 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
113 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
114 def getXY(prediction, positive, positiveCount, negative): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
115 sortedPrediction = sorted(prediction.items(), key=lambda x: x[1], |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
116 reverse=True) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
117 positiveTotal = positiveCount |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
118 negativeTotal = len(negative) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
119 x = list([0]) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
120 y = list([0]) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
121 xMax = 0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
122 topCount = 0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
123 topMCC = 0.0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
124 topPrecision = 0.0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
125 topScore = 0.0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
126 tp = 0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
127 fp = 0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
128 count = 0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
129 for (name, score) in sortedPrediction: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
130 found = False |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
131 if name in positive: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
132 found = True |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
133 tp = tp + 1 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
134 if name in negative: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
135 found = True |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
136 fp = fp + 1 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
137 precision = 0.0 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
138 if tp > 0 or fp > 0: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
139 precision = tp / (tp + fp) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
140 fn = positiveTotal - tp |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
141 tn = negativeTotal - fp |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
142 denom = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
143 if denom > 0.0: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
144 mcc = (tp*tn-fp*fn)/math.sqrt(denom) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
145 if mcc >= topMCC: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
146 topMCC = mcc |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
147 topScore = score |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
148 topCount = count |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
149 topPrecision = precision |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
150 if found: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
151 yValue = getPercentage(tp, tp + fn) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
152 xValue = getPercentage(fp, fp + tn) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
153 y.append(yValue) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
154 x.append(xValue) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
155 xMax = max(xValue, xMax) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
156 count = count + 1 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
157 print("Top ranking prediction %s." % str(sortedPrediction[0])) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
158 print("Total count of prediction set: %s (precision=%1.2f)." % |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
159 (topCount, topPrecision)) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
160 print("Total count of positive set: %s." % len(positive)) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
161 print("Total count of negative set: %s." % len(negative)) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
162 print("Matthews-Correlation-Coefficient: %s at Score >= %s." % |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
163 (round(topMCC, 2), topScore)) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
164 return x, y, xMax |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
165 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
166 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
167 def main(args): |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
168 # load source files |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
169 filterSets = getFilter(args.input) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
170 filterKeys = list(filterSets.keys()) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
171 filterA = filterSets[filterKeys[0]] |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
172 if len(filterKeys) > 1: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
173 filterB = filterSets[filterKeys[1]] |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
174 else: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
175 filterB = filterA |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
176 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
177 # identify biogrid filter options |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
178 filterValues = [] |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
179 if args.method: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
180 filterValues.append([11, args.method]) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
181 if args.experiment: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
182 filterValues.append([12, args.experiment]) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
183 if args.throughput: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
184 filterValues.append([17, args.throughput]) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
185 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
186 # process biogrid database |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
187 print("Loading positive set from BioGRID file...") |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
188 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
189 separator="\t", filterA=filterA, |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
190 filterB=filterB, skipFirstLine=True, |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
191 filterValues=filterValues) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
192 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
193 # rescan biogrid database to identify set of putative interactions |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
194 if filterValues: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
195 print("Filtered entries by (column, value): %s" % filterValues) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
196 print("Loading putative set from BioGRID file...") |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
197 putative, putativeCount = getReference(args.biogrid, aCol=23, bCol=26, |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
198 separator="\t", filterA=filterA, |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
199 filterB=filterB, |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
200 skipFirstLine=True) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
201 print("Found %s." % putativeCount) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
202 else: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
203 putative = positive |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
204 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
205 # process prediction file |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
206 print("Loading prediction file...") |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
207 prediction, _ = getReference(args.input, scoreCol=2) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
208 |
34
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
209 # determine negative set |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
210 print("Identifying non-interacting pairs...") |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
211 negative = set() |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
212 if isfile(args.negative): |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
213 # load from explicit file |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
214 with open(args.negative) as file: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
215 for line in file: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
216 cols = line.split() |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
217 nameA = cols[0] |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
218 nameB = cols[1] |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
219 key = getKey(nameA, nameB) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
220 if key not in putative and key not in negative: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
221 negative.add(key) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
222 else: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
223 # get subcellular locations from UniProt export |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
224 locations = dict() |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
225 if isfile(args.locations): |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
226 regions = list() |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
227 if args.regions: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
228 regions = args.regions.split(",") |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
229 with open(args.locations) as locFile: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
230 for line in locFile: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
231 searchKey = "SUBCELLULAR LOCATION" |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
232 searchPos = line.find(searchKey) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
233 if searchPos != -1: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
234 uniId = line.split()[0] |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
235 locStart = searchPos + len(searchKey) + 1 |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
236 locId = line[locStart:].split()[0] |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
237 if regions: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
238 if locId not in regions: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
239 continue |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
240 if uniId in filterA or uniId in filterB: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
241 locations[uniId] = locId |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
242 print("Found %d subcellular locations." % (len(list(locations.keys())))) |
30
b0e195a47df7
"planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
guerler
parents:
29
diff
changeset
|
243 |
34
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
244 # randomly sample non-interacting pairs |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
245 filterAList = sorted(list(filterA)) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
246 filterBList = sorted(list(filterB)) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
247 negativeRequired = positiveCount |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
248 random.seed(0) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
249 totalAttempts = int(len(filterAList) * len(filterBList) / 2) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
250 while totalAttempts > 0: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
251 totalAttempts = totalAttempts - 1 |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
252 nameA = random.choice(filterAList) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
253 nameB = random.choice(filterBList) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
254 if locations: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
255 if nameA not in locations or nameB not in locations: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
256 continue |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
257 if locations[nameA] == locations[nameB]: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
258 continue |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
259 key = getKey(nameA, nameB) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
260 if key not in putative and key not in negative: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
261 negative.add(key) |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
262 negativeRequired = negativeRequired - 1 |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
263 if negativeRequired == 0: |
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
264 break |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
265 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
266 # create plot |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
267 print("Producing plot data...") |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
268 print("Total count in prediction file: %d." % len(prediction)) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
269 print("Total count in positive file: %d." % len(positive)) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
270 plt.ylabel('True Positive Rate (%)') |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
271 plt.xlabel('False Positive Rate (%)') |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
272 title = " vs. ".join(filterSets) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
273 plt.suptitle(title) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
274 if filterValues: |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
275 filterAttributes = list(map(lambda x: x[1], filterValues)) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
276 plt.title("BioGRID filters: %s" % filterAttributes, fontsize=10) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
277 x, y, xMax = getXY(prediction, positive, positiveCount, negative) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
278 plt.plot(x, y) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
279 plt.plot([0, xMax], [0, xMax]) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
280 plt.savefig(args.output, format="png") |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
281 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
282 |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
283 if __name__ == "__main__": |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
284 parser = argparse.ArgumentParser(description='Create ROC plot.') |
34
b300ddbbf9d0
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
31
diff
changeset
|
285 parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True) |
30
b0e195a47df7
"planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
guerler
parents:
29
diff
changeset
|
286 parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True) |
35
0bcc0a269916
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
34
diff
changeset
|
287 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) |
0bcc0a269916
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
34
diff
changeset
|
288 parser.add_argument('-r', '--regions', help='Comma-separated subcellular locations', required=False) |
0bcc0a269916
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
34
diff
changeset
|
289 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) |
0bcc0a269916
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
34
diff
changeset
|
290 parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', required=False) |
0bcc0a269916
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
34
diff
changeset
|
291 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) |
0bcc0a269916
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
guerler
parents:
34
diff
changeset
|
292 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False) |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
293 parser.add_argument('-o', '--output', help='Output (png)', required=True) |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
294 args = parser.parse_args() |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
diff
changeset
|
295 main(args) |