# HG changeset patch # User guerler # Date 1614610956 0 # Node ID f316caf098a6996c7f1224c1cff31821c8f8c141 # Parent 06337927c198e936298d387680b3c00a1a1e1938 "planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3" diff -r 06337927c198 -r f316caf098a6 spring_mcc.py --- a/spring_mcc.py Sat Jan 23 14:42:46 2021 +0000 +++ b/spring_mcc.py Mon Mar 01 15:02:36 2021 +0000 @@ -1,9 +1,20 @@ #! /usr/bin/env python import argparse import math +import pandas as pd from os.path import isfile import re -from matplotlib import pyplot as plt + +METHODS = ["Biochemical Activity", + "Co-fractionation", + "Co-localization", + "Far Western", + "FRET", + "PCA", + "Co-crystal Structure", + "Co-purification", + "Two-hybrid", + "Affinity Capture-MS"] def getIds(rawIds): @@ -213,8 +224,8 @@ filterAList = sorted(locations[regionA]) filterBList = sorted(locations[regionB]) else: - filterAList = list(filterA) - filterBList = list(filterB) + filterAList = sorted(filterA) + filterBList = sorted(filterB) for i, j in randomPairs(len(filterAList), len(filterBList), jSize): nameA = filterAList[i] nameB = filterBList[j] @@ -251,56 +262,57 @@ filterB = filterA # identify biogrid filter options - filterValues = list() - filterValues.append([11, args.method]) - - # process biogrid database - print("Loading positive set from BioGRID file...") - positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, - separator="\t", filterA=filterA, - filterB=filterB, skipFirstLine=True, - filterValues=filterValues) + performance = dict() + for methodReference in METHODS: - # estimate negative set - negative = getNegativeSet(args, filterA, filterB, positiveCount) + # process biogrid database + print("Loading positive set from BioGRID file (%s)..." % methodReference) + filterValues = [[11, methodReference]] + positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, + separator="\t", filterA=filterA, + filterB=filterB, skipFirstLine=True, + filterValues=filterValues) - # get prediction results - print("Loading prediction file...") - prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8) - mcc = getMCC(prediction, positive, positiveCount, negative) - yValues = [mcc] - yTicks = ["SPRING"] + # estimate negative set + negative = getNegativeSet(args, filterA, filterB, positiveCount) - # identify biogrid filter options - for method in ["Affinity Capture-MS", - "Biochemical Activity", - "Co-crystal Structure", - "Co-fractionation", - "Co-localization", - "Co-purification", - "Far Western", - "FRET", - "PCA", - "Reconstituted Complex", - "Two-hybrid"]: - if args.method != method: - print("Method: %s" % method) - filterValues = [[11, method]] - prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, - separator="\t", filterA=filterA, - filterB=filterB, skipFirstLine=True, - filterValues=filterValues) - mcc = getMCC(prediction, positive, positiveCount, negative) - yValues.append(mcc) - yTicks.append(method) + # evaluate other methods + yValues = list() + for method in METHODS: + if methodReference != method: + print("Method: %s" % method) + filterValues = [[11, method]] + prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, + separator="\t", filterA=filterA, + filterB=filterB, skipFirstLine=True, + filterValues=filterValues) + mcc = getMCC(prediction, positive, positiveCount, negative) + yValues.append(mcc) + else: + yValues.append(0.0) + + # add results to performance dication + performance[methodReference] = yValues + + # get and append prediction results + print("Loading prediction file...") + prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0) + mcc = getMCC(prediction, positive, positiveCount, negative) + performance[methodReference].append(mcc) + + # build yTicks + yTicks = METHODS[:] + yTicks.append("SPRING") # create plot print("Producing plot data...") print("Total count in prediction file: %d." % len(prediction)) print("Total count in positive file: %d." % len(positive)) - plt.xlabel("Matthews-Correlation Coefficient (MCC)") - plt.title("Positive set: %s" % args.method) - plt.barh(yTicks, yValues) + df = pd.DataFrame(performance, index=yTicks) + ax = df.plot.barh() + ax.set_title(args.experiment) + ax.set_xlabel("Matthews-Correlation Coefficient (MCC)") + plt = ax.get_figure() plt.tight_layout() plt.savefig(args.output, format="png") @@ -314,7 +326,7 @@ parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) - parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False) + parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results") parser.add_argument('-o', '--output', help='Output (png)', required=True) args = parser.parse_args() main(args) diff -r 06337927c198 -r f316caf098a6 spring_mcc.xml --- a/spring_mcc.xml Sat Jan 23 14:42:46 2021 +0000 +++ b/spring_mcc.xml Mon Mar 01 15:02:36 2021 +0000 @@ -2,11 +2,13 @@ plot generator matplotlib + pandas + @@ -50,19 +52,6 @@ - - - - - - - - - - - - - @@ -71,7 +60,6 @@ - diff -r 06337927c198 -r f316caf098a6 spring_model_all.py --- a/spring_model_all.py Sat Jan 23 14:42:46 2021 +0000 +++ b/spring_model_all.py Mon Mar 01 15:02:36 2021 +0000 @@ -34,7 +34,7 @@ mkdir("temp") dbkit = DBKit(args.hhr_index, args.hhr_database) logFile = open(args.log, "w") - logFile.write("#namea\t nameb\t springscore\t tmscore\t energy\t clashes\n") + logFile.write("#namea\t nameb\t springscore\t tmscore\t energy\t clashes\t zscore\t templatea\t templateb\n") with open(args.pairs, "r") as file: for line in file: param = line.split() @@ -52,12 +52,17 @@ modelArgs.set(a_hhr=aFile, b_hhr=bFile, output=output) modelData = createModel(modelArgs) if modelData: - infoStr = "%s\t %s\t %5.2f\t %5.2f\t %5.2f\t %5.2f\n" % (aIdentifier, bIdentifier, - modelData["springscore"], - modelData["tmscore"], - modelData["energy"], - modelData["clashes"]) + infoStr = "%s\t %s\t %5.2f\t %5.2f\t %5.2f\t %5.2f\t %5.2f\t %s\t %s\n" + infoStr = infoStr % (aIdentifier, bIdentifier, + modelData["springscore"], + modelData["tmscore"], + modelData["energy"], + modelData["clashes"], + modelData["zscore"], + modelData["aTemplate"], + modelData["bTemplate"]) logFile.write(infoStr) + logFile.flush() if isfile(aFile): remove(aFile) if isfile(bFile): diff -r 06337927c198 -r f316caf098a6 spring_package/Modeller.py --- a/spring_package/Modeller.py Sat Jan 23 14:42:46 2021 +0000 +++ b/spring_package/Modeller.py Mon Mar 01 15:02:36 2021 +0000 @@ -85,7 +85,7 @@ templateIndex = 0 for i in range(len(alignment)): t = templateAlign[i] - if alignment[i] == ":": + if alignment[i] in [":", "."]: templateResidue = templateResidues[templateIndex] templateResidue["alignedResidue"] = modelAlign[i] aligned.append(templateResidue) @@ -112,7 +112,7 @@ if templateHit["score"] < minScore or maxTries == 0: break maxTries = maxTries - 1 - yield templateHit["templatePair"] + yield templateHit["templatePair"], templateHit["score"] def createModel(args): @@ -139,7 +139,7 @@ maxInfo = None minScore = float(args.minscore) maxTries = int(args.maxtries) - for [aTemplate, bTemplate] in getFrameworks(aTemplates, bTemplates, crossReference, minScore=minScore, maxTries=maxTries): + for [aTemplate, bTemplate], zscore in getFrameworks(aTemplates, bTemplates, crossReference, minScore=minScore, maxTries=maxTries): print("Evaluating Complex Template: %s." % aTemplate) templateFile = "temp/template.pdb" createPDB(aTemplate, pdbDatabase, templateFile) @@ -168,6 +168,7 @@ print(str(e)) continue biomolFound = True + print(" zscore:\t%5.2f" % zscore) tmscore = min(coreScore, partnerScore) print(" tmscore:\t%5.2f" % tmscore) energy = -interfaceEnergy.get(coreAligned, partnerAligned) @@ -178,7 +179,7 @@ print(" springscore:\t%5.2f" % springscore) if springscore > maxScore and clashes < args.maxclashes: maxScore = springscore - maxInfo = dict(springscore=springscore, tmscore=tmscore, energy=energy, clashes=clashes) + maxInfo = dict(aTemplate=aTemplate, bTemplate=bTemplate, springscore=springscore, tmscore=tmscore, energy=energy, clashes=clashes, zscore=zscore) coreMolecule.save(outputName, chainName="0") partnerMolecule.save(outputName, chainName="1", append=True) if args.showtemplate == "true": @@ -188,7 +189,7 @@ if maxInfo is not None: print("Final Model:") for key in maxInfo: - print(" %s:\t%5.2f" % (key, maxInfo[key])) + print(" %s:\t%s" % (key, maxInfo[key])) print("Completed.") else: print("Warning: Failed to determine model.") diff -r 06337927c198 -r f316caf098a6 spring_package/__pycache__/Modeller.cpython-38.pyc Binary file spring_package/__pycache__/Modeller.cpython-38.pyc has changed diff -r 06337927c198 -r f316caf098a6 spring_package/__pycache__/Modeller.cpython-39.pyc Binary file spring_package/__pycache__/Modeller.cpython-39.pyc has changed diff -r 06337927c198 -r f316caf098a6 test-data/mcc/human_hv1h2.png Binary file test-data/mcc/human_hv1h2.png has changed diff -r 06337927c198 -r f316caf098a6 test-data/model/log.txt --- a/test-data/model/log.txt Sat Jan 23 14:42:46 2021 +0000 +++ b/test-data/model/log.txt Mon Mar 01 15:02:36 2021 +0000 @@ -1,2 +1,2 @@ -#namea nameb springscore tmscore energy clashes -sp|Q9BYF1|ACE2_HUMAN sp|P0DTC2|SPIKE_SARS2 0.91 0.82 -8.92 0.00 +#namea nameb springscore tmscore energy clashes zscore templatea templateb +sp|Q9BYF1|ACE2_HUMAN sp|P0DTC2|SPIKE_SARS2 0.91 0.82 -8.92 0.00 406.00 6LZG_A 6LZG_B