Mercurial > repos > guerler > springsuite
diff spring_mcc.py @ 41:f316caf098a6 draft default tip
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
author | guerler |
---|---|
date | Mon, 01 Mar 2021 15:02:36 +0000 |
parents | 172398348efd |
children |
line wrap: on
line diff
--- a/spring_mcc.py Sat Jan 23 14:42:46 2021 +0000 +++ b/spring_mcc.py Mon Mar 01 15:02:36 2021 +0000 @@ -1,9 +1,20 @@ #! /usr/bin/env python import argparse import math +import pandas as pd from os.path import isfile import re -from matplotlib import pyplot as plt + +METHODS = ["Biochemical Activity", + "Co-fractionation", + "Co-localization", + "Far Western", + "FRET", + "PCA", + "Co-crystal Structure", + "Co-purification", + "Two-hybrid", + "Affinity Capture-MS"] def getIds(rawIds): @@ -213,8 +224,8 @@ filterAList = sorted(locations[regionA]) filterBList = sorted(locations[regionB]) else: - filterAList = list(filterA) - filterBList = list(filterB) + filterAList = sorted(filterA) + filterBList = sorted(filterB) for i, j in randomPairs(len(filterAList), len(filterBList), jSize): nameA = filterAList[i] nameB = filterBList[j] @@ -251,56 +262,57 @@ filterB = filterA # identify biogrid filter options - filterValues = list() - filterValues.append([11, args.method]) - - # process biogrid database - print("Loading positive set from BioGRID file...") - positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, - separator="\t", filterA=filterA, - filterB=filterB, skipFirstLine=True, - filterValues=filterValues) + performance = dict() + for methodReference in METHODS: - # estimate negative set - negative = getNegativeSet(args, filterA, filterB, positiveCount) + # process biogrid database + print("Loading positive set from BioGRID file (%s)..." % methodReference) + filterValues = [[11, methodReference]] + positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, + separator="\t", filterA=filterA, + filterB=filterB, skipFirstLine=True, + filterValues=filterValues) - # get prediction results - print("Loading prediction file...") - prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8) - mcc = getMCC(prediction, positive, positiveCount, negative) - yValues = [mcc] - yTicks = ["SPRING"] + # estimate negative set + negative = getNegativeSet(args, filterA, filterB, positiveCount) - # identify biogrid filter options - for method in ["Affinity Capture-MS", - "Biochemical Activity", - "Co-crystal Structure", - "Co-fractionation", - "Co-localization", - "Co-purification", - "Far Western", - "FRET", - "PCA", - "Reconstituted Complex", - "Two-hybrid"]: - if args.method != method: - print("Method: %s" % method) - filterValues = [[11, method]] - prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, - separator="\t", filterA=filterA, - filterB=filterB, skipFirstLine=True, - filterValues=filterValues) - mcc = getMCC(prediction, positive, positiveCount, negative) - yValues.append(mcc) - yTicks.append(method) + # evaluate other methods + yValues = list() + for method in METHODS: + if methodReference != method: + print("Method: %s" % method) + filterValues = [[11, method]] + prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, + separator="\t", filterA=filterA, + filterB=filterB, skipFirstLine=True, + filterValues=filterValues) + mcc = getMCC(prediction, positive, positiveCount, negative) + yValues.append(mcc) + else: + yValues.append(0.0) + + # add results to performance dication + performance[methodReference] = yValues + + # get and append prediction results + print("Loading prediction file...") + prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0) + mcc = getMCC(prediction, positive, positiveCount, negative) + performance[methodReference].append(mcc) + + # build yTicks + yTicks = METHODS[:] + yTicks.append("SPRING") # create plot print("Producing plot data...") print("Total count in prediction file: %d." % len(prediction)) print("Total count in positive file: %d." % len(positive)) - plt.xlabel("Matthews-Correlation Coefficient (MCC)") - plt.title("Positive set: %s" % args.method) - plt.barh(yTicks, yValues) + df = pd.DataFrame(performance, index=yTicks) + ax = df.plot.barh() + ax.set_title(args.experiment) + ax.set_xlabel("Matthews-Correlation Coefficient (MCC)") + plt = ax.get_figure() plt.tight_layout() plt.savefig(args.output, format="png") @@ -314,7 +326,7 @@ parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) - parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False) + parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results") parser.add_argument('-o', '--output', help='Output (png)', required=True) args = parser.parse_args() main(args)