# HG changeset patch
# User guerler
# Date 1614610956 0
# Node ID f316caf098a6996c7f1224c1cff31821c8f8c141
# Parent 06337927c198e936298d387680b3c00a1a1e1938
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
diff -r 06337927c198 -r f316caf098a6 spring_mcc.py
--- a/spring_mcc.py Sat Jan 23 14:42:46 2021 +0000
+++ b/spring_mcc.py Mon Mar 01 15:02:36 2021 +0000
@@ -1,9 +1,20 @@
#! /usr/bin/env python
import argparse
import math
+import pandas as pd
from os.path import isfile
import re
-from matplotlib import pyplot as plt
+
+METHODS = ["Biochemical Activity",
+ "Co-fractionation",
+ "Co-localization",
+ "Far Western",
+ "FRET",
+ "PCA",
+ "Co-crystal Structure",
+ "Co-purification",
+ "Two-hybrid",
+ "Affinity Capture-MS"]
def getIds(rawIds):
@@ -213,8 +224,8 @@
filterAList = sorted(locations[regionA])
filterBList = sorted(locations[regionB])
else:
- filterAList = list(filterA)
- filterBList = list(filterB)
+ filterAList = sorted(filterA)
+ filterBList = sorted(filterB)
for i, j in randomPairs(len(filterAList), len(filterBList), jSize):
nameA = filterAList[i]
nameB = filterBList[j]
@@ -251,56 +262,57 @@
filterB = filterA
# identify biogrid filter options
- filterValues = list()
- filterValues.append([11, args.method])
-
- # process biogrid database
- print("Loading positive set from BioGRID file...")
- positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
- separator="\t", filterA=filterA,
- filterB=filterB, skipFirstLine=True,
- filterValues=filterValues)
+ performance = dict()
+ for methodReference in METHODS:
- # estimate negative set
- negative = getNegativeSet(args, filterA, filterB, positiveCount)
+ # process biogrid database
+ print("Loading positive set from BioGRID file (%s)..." % methodReference)
+ filterValues = [[11, methodReference]]
+ positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
+ separator="\t", filterA=filterA,
+ filterB=filterB, skipFirstLine=True,
+ filterValues=filterValues)
- # get prediction results
- print("Loading prediction file...")
- prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8)
- mcc = getMCC(prediction, positive, positiveCount, negative)
- yValues = [mcc]
- yTicks = ["SPRING"]
+ # estimate negative set
+ negative = getNegativeSet(args, filterA, filterB, positiveCount)
- # identify biogrid filter options
- for method in ["Affinity Capture-MS",
- "Biochemical Activity",
- "Co-crystal Structure",
- "Co-fractionation",
- "Co-localization",
- "Co-purification",
- "Far Western",
- "FRET",
- "PCA",
- "Reconstituted Complex",
- "Two-hybrid"]:
- if args.method != method:
- print("Method: %s" % method)
- filterValues = [[11, method]]
- prediction, _ = getReference(args.biogrid, aCol=23, bCol=26,
- separator="\t", filterA=filterA,
- filterB=filterB, skipFirstLine=True,
- filterValues=filterValues)
- mcc = getMCC(prediction, positive, positiveCount, negative)
- yValues.append(mcc)
- yTicks.append(method)
+ # evaluate other methods
+ yValues = list()
+ for method in METHODS:
+ if methodReference != method:
+ print("Method: %s" % method)
+ filterValues = [[11, method]]
+ prediction, _ = getReference(args.biogrid, aCol=23, bCol=26,
+ separator="\t", filterA=filterA,
+ filterB=filterB, skipFirstLine=True,
+ filterValues=filterValues)
+ mcc = getMCC(prediction, positive, positiveCount, negative)
+ yValues.append(mcc)
+ else:
+ yValues.append(0.0)
+
+ # add results to performance dication
+ performance[methodReference] = yValues
+
+ # get and append prediction results
+ print("Loading prediction file...")
+ prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0)
+ mcc = getMCC(prediction, positive, positiveCount, negative)
+ performance[methodReference].append(mcc)
+
+ # build yTicks
+ yTicks = METHODS[:]
+ yTicks.append("SPRING")
# create plot
print("Producing plot data...")
print("Total count in prediction file: %d." % len(prediction))
print("Total count in positive file: %d." % len(positive))
- plt.xlabel("Matthews-Correlation Coefficient (MCC)")
- plt.title("Positive set: %s" % args.method)
- plt.barh(yTicks, yValues)
+ df = pd.DataFrame(performance, index=yTicks)
+ ax = df.plot.barh()
+ ax.set_title(args.experiment)
+ ax.set_xlabel("Matthews-Correlation Coefficient (MCC)")
+ plt = ax.get_figure()
plt.tight_layout()
plt.savefig(args.output, format="png")
@@ -314,7 +326,7 @@
parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False)
parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False)
parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False)
- parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False)
+ parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results")
parser.add_argument('-o', '--output', help='Output (png)', required=True)
args = parser.parse_args()
main(args)
diff -r 06337927c198 -r f316caf098a6 spring_mcc.xml
--- a/spring_mcc.xml Sat Jan 23 14:42:46 2021 +0000
+++ b/spring_mcc.xml Mon Mar 01 15:02:36 2021 +0000
@@ -2,11 +2,13 @@
plot generator
matplotlib
+ pandas
+
@@ -50,19 +52,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -71,7 +60,6 @@
-
diff -r 06337927c198 -r f316caf098a6 spring_model_all.py
--- a/spring_model_all.py Sat Jan 23 14:42:46 2021 +0000
+++ b/spring_model_all.py Mon Mar 01 15:02:36 2021 +0000
@@ -34,7 +34,7 @@
mkdir("temp")
dbkit = DBKit(args.hhr_index, args.hhr_database)
logFile = open(args.log, "w")
- logFile.write("#namea\t nameb\t springscore\t tmscore\t energy\t clashes\n")
+ logFile.write("#namea\t nameb\t springscore\t tmscore\t energy\t clashes\t zscore\t templatea\t templateb\n")
with open(args.pairs, "r") as file:
for line in file:
param = line.split()
@@ -52,12 +52,17 @@
modelArgs.set(a_hhr=aFile, b_hhr=bFile, output=output)
modelData = createModel(modelArgs)
if modelData:
- infoStr = "%s\t %s\t %5.2f\t %5.2f\t %5.2f\t %5.2f\n" % (aIdentifier, bIdentifier,
- modelData["springscore"],
- modelData["tmscore"],
- modelData["energy"],
- modelData["clashes"])
+ infoStr = "%s\t %s\t %5.2f\t %5.2f\t %5.2f\t %5.2f\t %5.2f\t %s\t %s\n"
+ infoStr = infoStr % (aIdentifier, bIdentifier,
+ modelData["springscore"],
+ modelData["tmscore"],
+ modelData["energy"],
+ modelData["clashes"],
+ modelData["zscore"],
+ modelData["aTemplate"],
+ modelData["bTemplate"])
logFile.write(infoStr)
+ logFile.flush()
if isfile(aFile):
remove(aFile)
if isfile(bFile):
diff -r 06337927c198 -r f316caf098a6 spring_package/Modeller.py
--- a/spring_package/Modeller.py Sat Jan 23 14:42:46 2021 +0000
+++ b/spring_package/Modeller.py Mon Mar 01 15:02:36 2021 +0000
@@ -85,7 +85,7 @@
templateIndex = 0
for i in range(len(alignment)):
t = templateAlign[i]
- if alignment[i] == ":":
+ if alignment[i] in [":", "."]:
templateResidue = templateResidues[templateIndex]
templateResidue["alignedResidue"] = modelAlign[i]
aligned.append(templateResidue)
@@ -112,7 +112,7 @@
if templateHit["score"] < minScore or maxTries == 0:
break
maxTries = maxTries - 1
- yield templateHit["templatePair"]
+ yield templateHit["templatePair"], templateHit["score"]
def createModel(args):
@@ -139,7 +139,7 @@
maxInfo = None
minScore = float(args.minscore)
maxTries = int(args.maxtries)
- for [aTemplate, bTemplate] in getFrameworks(aTemplates, bTemplates, crossReference, minScore=minScore, maxTries=maxTries):
+ for [aTemplate, bTemplate], zscore in getFrameworks(aTemplates, bTemplates, crossReference, minScore=minScore, maxTries=maxTries):
print("Evaluating Complex Template: %s." % aTemplate)
templateFile = "temp/template.pdb"
createPDB(aTemplate, pdbDatabase, templateFile)
@@ -168,6 +168,7 @@
print(str(e))
continue
biomolFound = True
+ print(" zscore:\t%5.2f" % zscore)
tmscore = min(coreScore, partnerScore)
print(" tmscore:\t%5.2f" % tmscore)
energy = -interfaceEnergy.get(coreAligned, partnerAligned)
@@ -178,7 +179,7 @@
print(" springscore:\t%5.2f" % springscore)
if springscore > maxScore and clashes < args.maxclashes:
maxScore = springscore
- maxInfo = dict(springscore=springscore, tmscore=tmscore, energy=energy, clashes=clashes)
+ maxInfo = dict(aTemplate=aTemplate, bTemplate=bTemplate, springscore=springscore, tmscore=tmscore, energy=energy, clashes=clashes, zscore=zscore)
coreMolecule.save(outputName, chainName="0")
partnerMolecule.save(outputName, chainName="1", append=True)
if args.showtemplate == "true":
@@ -188,7 +189,7 @@
if maxInfo is not None:
print("Final Model:")
for key in maxInfo:
- print(" %s:\t%5.2f" % (key, maxInfo[key]))
+ print(" %s:\t%s" % (key, maxInfo[key]))
print("Completed.")
else:
print("Warning: Failed to determine model.")
diff -r 06337927c198 -r f316caf098a6 spring_package/__pycache__/Modeller.cpython-38.pyc
Binary file spring_package/__pycache__/Modeller.cpython-38.pyc has changed
diff -r 06337927c198 -r f316caf098a6 spring_package/__pycache__/Modeller.cpython-39.pyc
Binary file spring_package/__pycache__/Modeller.cpython-39.pyc has changed
diff -r 06337927c198 -r f316caf098a6 test-data/mcc/human_hv1h2.png
Binary file test-data/mcc/human_hv1h2.png has changed
diff -r 06337927c198 -r f316caf098a6 test-data/model/log.txt
--- a/test-data/model/log.txt Sat Jan 23 14:42:46 2021 +0000
+++ b/test-data/model/log.txt Mon Mar 01 15:02:36 2021 +0000
@@ -1,2 +1,2 @@
-#namea nameb springscore tmscore energy clashes
-sp|Q9BYF1|ACE2_HUMAN sp|P0DTC2|SPIKE_SARS2 0.91 0.82 -8.92 0.00
+#namea nameb springscore tmscore energy clashes zscore templatea templateb
+sp|Q9BYF1|ACE2_HUMAN sp|P0DTC2|SPIKE_SARS2 0.91 0.82 -8.92 0.00 406.00 6LZG_A 6LZG_B