Mercurial > repos > guerler > springsuite
comparison spring_mcc.py @ 41:f316caf098a6 draft default tip
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
author | guerler |
---|---|
date | Mon, 01 Mar 2021 15:02:36 +0000 |
parents | 172398348efd |
children |
comparison
equal
deleted
inserted
replaced
40:06337927c198 | 41:f316caf098a6 |
---|---|
1 #! /usr/bin/env python | 1 #! /usr/bin/env python |
2 import argparse | 2 import argparse |
3 import math | 3 import math |
4 import pandas as pd | |
4 from os.path import isfile | 5 from os.path import isfile |
5 import re | 6 import re |
6 from matplotlib import pyplot as plt | 7 |
8 METHODS = ["Biochemical Activity", | |
9 "Co-fractionation", | |
10 "Co-localization", | |
11 "Far Western", | |
12 "FRET", | |
13 "PCA", | |
14 "Co-crystal Structure", | |
15 "Co-purification", | |
16 "Two-hybrid", | |
17 "Affinity Capture-MS"] | |
7 | 18 |
8 | 19 |
9 def getIds(rawIds): | 20 def getIds(rawIds): |
10 return rawIds.split("|") | 21 return rawIds.split("|") |
11 | 22 |
211 elif (regionA not in locId and regionB in locId): | 222 elif (regionA not in locId and regionB in locId): |
212 locations[regionB].append(uniId) | 223 locations[regionB].append(uniId) |
213 filterAList = sorted(locations[regionA]) | 224 filterAList = sorted(locations[regionA]) |
214 filterBList = sorted(locations[regionB]) | 225 filterBList = sorted(locations[regionB]) |
215 else: | 226 else: |
216 filterAList = list(filterA) | 227 filterAList = sorted(filterA) |
217 filterBList = list(filterB) | 228 filterBList = sorted(filterB) |
218 for i, j in randomPairs(len(filterAList), len(filterBList), jSize): | 229 for i, j in randomPairs(len(filterAList), len(filterBList), jSize): |
219 nameA = filterAList[i] | 230 nameA = filterAList[i] |
220 nameB = filterBList[j] | 231 nameB = filterBList[j] |
221 key = getKey(nameA, nameB) | 232 key = getKey(nameA, nameB) |
222 if key not in negative: | 233 if key not in negative: |
249 filterB = filterSets[filterKeys[1]] | 260 filterB = filterSets[filterKeys[1]] |
250 else: | 261 else: |
251 filterB = filterA | 262 filterB = filterA |
252 | 263 |
253 # identify biogrid filter options | 264 # identify biogrid filter options |
254 filterValues = list() | 265 performance = dict() |
255 filterValues.append([11, args.method]) | 266 for methodReference in METHODS: |
256 | 267 |
257 # process biogrid database | 268 # process biogrid database |
258 print("Loading positive set from BioGRID file...") | 269 print("Loading positive set from BioGRID file (%s)..." % methodReference) |
259 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, | 270 filterValues = [[11, methodReference]] |
260 separator="\t", filterA=filterA, | 271 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, |
261 filterB=filterB, skipFirstLine=True, | 272 separator="\t", filterA=filterA, |
262 filterValues=filterValues) | 273 filterB=filterB, skipFirstLine=True, |
263 | 274 filterValues=filterValues) |
264 # estimate negative set | 275 |
265 negative = getNegativeSet(args, filterA, filterB, positiveCount) | 276 # estimate negative set |
266 | 277 negative = getNegativeSet(args, filterA, filterB, positiveCount) |
267 # get prediction results | 278 |
268 print("Loading prediction file...") | 279 # evaluate other methods |
269 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8) | 280 yValues = list() |
270 mcc = getMCC(prediction, positive, positiveCount, negative) | 281 for method in METHODS: |
271 yValues = [mcc] | 282 if methodReference != method: |
272 yTicks = ["SPRING"] | 283 print("Method: %s" % method) |
273 | 284 filterValues = [[11, method]] |
274 # identify biogrid filter options | 285 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, |
275 for method in ["Affinity Capture-MS", | 286 separator="\t", filterA=filterA, |
276 "Biochemical Activity", | 287 filterB=filterB, skipFirstLine=True, |
277 "Co-crystal Structure", | 288 filterValues=filterValues) |
278 "Co-fractionation", | 289 mcc = getMCC(prediction, positive, positiveCount, negative) |
279 "Co-localization", | 290 yValues.append(mcc) |
280 "Co-purification", | 291 else: |
281 "Far Western", | 292 yValues.append(0.0) |
282 "FRET", | 293 |
283 "PCA", | 294 # add results to performance dication |
284 "Reconstituted Complex", | 295 performance[methodReference] = yValues |
285 "Two-hybrid"]: | 296 |
286 if args.method != method: | 297 # get and append prediction results |
287 print("Method: %s" % method) | 298 print("Loading prediction file...") |
288 filterValues = [[11, method]] | 299 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0) |
289 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, | 300 mcc = getMCC(prediction, positive, positiveCount, negative) |
290 separator="\t", filterA=filterA, | 301 performance[methodReference].append(mcc) |
291 filterB=filterB, skipFirstLine=True, | 302 |
292 filterValues=filterValues) | 303 # build yTicks |
293 mcc = getMCC(prediction, positive, positiveCount, negative) | 304 yTicks = METHODS[:] |
294 yValues.append(mcc) | 305 yTicks.append("SPRING") |
295 yTicks.append(method) | |
296 | 306 |
297 # create plot | 307 # create plot |
298 print("Producing plot data...") | 308 print("Producing plot data...") |
299 print("Total count in prediction file: %d." % len(prediction)) | 309 print("Total count in prediction file: %d." % len(prediction)) |
300 print("Total count in positive file: %d." % len(positive)) | 310 print("Total count in positive file: %d." % len(positive)) |
301 plt.xlabel("Matthews-Correlation Coefficient (MCC)") | 311 df = pd.DataFrame(performance, index=yTicks) |
302 plt.title("Positive set: %s" % args.method) | 312 ax = df.plot.barh() |
303 plt.barh(yTicks, yValues) | 313 ax.set_title(args.experiment) |
314 ax.set_xlabel("Matthews-Correlation Coefficient (MCC)") | |
315 plt = ax.get_figure() | |
304 plt.tight_layout() | 316 plt.tight_layout() |
305 plt.savefig(args.output, format="png") | 317 plt.savefig(args.output, format="png") |
306 | 318 |
307 | 319 |
308 if __name__ == "__main__": | 320 if __name__ == "__main__": |
312 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) | 324 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) |
313 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False) | 325 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False) |
314 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) | 326 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) |
315 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) | 327 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) |
316 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) | 328 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) |
317 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False) | 329 parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results") |
318 parser.add_argument('-o', '--output', help='Output (png)', required=True) | 330 parser.add_argument('-o', '--output', help='Output (png)', required=True) |
319 args = parser.parse_args() | 331 args = parser.parse_args() |
320 main(args) | 332 main(args) |