springsuite: spring_mcc.py comparison

comparison spring_mcc.py @ 41:f316caf098a6 draft default tip

"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"

author	guerler
date	Mon, 01 Mar 2021 15:02:36 +0000
parents	172398348efd
children

comparison

equal deleted inserted replaced

-:06337927c198
+:f316caf098a6
 #! /usr/bin/env python
 import argparse
 import math
+import pandas as pd
 from os.path import isfile
 import re
-from matplotlib import pyplot as plt
+METHODS = ["Biochemical Activity",
+"Co-fractionation",
+"Co-localization",
+"Far Western",
+"FRET",
+"PCA",
+"Co-crystal Structure",
+"Co-purification",
+"Two-hybrid",
+"Affinity Capture-MS"]
 def getIds(rawIds):
 return rawIds.split("|")
 elif (regionA not in locId and regionB in locId):
 locations[regionB].append(uniId)
 filterAList = sorted(locations[regionA])
 filterBList = sorted(locations[regionB])
 else:
-filterAList = list(filterA)
+filterAList = sorted(filterA)
-filterBList = list(filterB)
+filterBList = sorted(filterB)
 for i, j in randomPairs(len(filterAList), len(filterBList), jSize):
 nameA = filterAList[i]
 nameB = filterBList[j]
 key = getKey(nameA, nameB)
 if key not in negative:
 filterB = filterSets[filterKeys[1]]
 else:
 filterB = filterA
 # identify biogrid filter options
-filterValues = list()
+performance = dict()
-filterValues.append([11, args.method])
+for methodReference in METHODS:
 # process biogrid database
-print("Loading positive set from BioGRID file...")
+print("Loading positive set from BioGRID file (%s)..." % methodReference)
-positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
+filterValues = [[11, methodReference]]
-separator="\t", filterA=filterA,
+positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
-filterB=filterB, skipFirstLine=True,
+separator="\t", filterA=filterA,
-filterValues=filterValues)
+filterB=filterB, skipFirstLine=True,
+filterValues=filterValues)
-# estimate negative set
-negative = getNegativeSet(args, filterA, filterB, positiveCount)
+# estimate negative set
+negative = getNegativeSet(args, filterA, filterB, positiveCount)
-# get prediction results
-print("Loading prediction file...")
+# evaluate other methods
-prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8)
+yValues = list()
-mcc = getMCC(prediction, positive, positiveCount, negative)
+for method in METHODS:
-yValues = [mcc]
+if methodReference != method:
-yTicks = ["SPRING"]
+print("Method: %s" % method)
+filterValues = [[11, method]]
-# identify biogrid filter options
+prediction, _ = getReference(args.biogrid, aCol=23, bCol=26,
-for method in ["Affinity Capture-MS",
+separator="\t", filterA=filterA,
-"Biochemical Activity",
+filterB=filterB, skipFirstLine=True,
-"Co-crystal Structure",
+filterValues=filterValues)
-"Co-fractionation",
+mcc = getMCC(prediction, positive, positiveCount, negative)
-"Co-localization",
+yValues.append(mcc)
-"Co-purification",
+else:
-"Far Western",
+yValues.append(0.0)
-"FRET",
-"PCA",
+# add results to performance dication
-"Reconstituted Complex",
+performance[methodReference] = yValues
-"Two-hybrid"]:
-if args.method != method:
+# get and append prediction results
-print("Method: %s" % method)
+print("Loading prediction file...")
-filterValues = [[11, method]]
+prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0)
-prediction, _ = getReference(args.biogrid, aCol=23, bCol=26,
+mcc = getMCC(prediction, positive, positiveCount, negative)
-separator="\t", filterA=filterA,
+performance[methodReference].append(mcc)
-filterB=filterB, skipFirstLine=True,
-filterValues=filterValues)
+# build yTicks
-mcc = getMCC(prediction, positive, positiveCount, negative)
+yTicks = METHODS[:]
-yValues.append(mcc)
+yTicks.append("SPRING")
-yTicks.append(method)
 # create plot
 print("Producing plot data...")
 print("Total count in prediction file: %d." % len(prediction))
 print("Total count in positive file: %d." % len(positive))
-plt.xlabel("Matthews-Correlation Coefficient (MCC)")
+df = pd.DataFrame(performance, index=yTicks)
-plt.title("Positive set: %s" % args.method)
+ax = df.plot.barh()
-plt.barh(yTicks, yValues)
+ax.set_title(args.experiment)
+ax.set_xlabel("Matthews-Correlation Coefficient (MCC)")
+plt = ax.get_figure()
 plt.tight_layout()
 plt.savefig(args.output, format="png")
 if __name__ == "__main__":
 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False)
 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False)
 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False)
 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False)
-parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False)
+parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results")
 parser.add_argument('-o', '--output', help='Output (png)', required=True)
 args = parser.parse_args()
 main(args)

Mercurial > repos > guerler > springsuite

comparison spring_mcc.py @ 41:f316caf098a6 draft default tip