comparison spring_mcc.py @ 41:f316caf098a6 draft default tip

"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
author guerler
date Mon, 01 Mar 2021 15:02:36 +0000
parents 172398348efd
children
comparison
equal deleted inserted replaced
40:06337927c198 41:f316caf098a6
1 #! /usr/bin/env python 1 #! /usr/bin/env python
2 import argparse 2 import argparse
3 import math 3 import math
4 import pandas as pd
4 from os.path import isfile 5 from os.path import isfile
5 import re 6 import re
6 from matplotlib import pyplot as plt 7
8 METHODS = ["Biochemical Activity",
9 "Co-fractionation",
10 "Co-localization",
11 "Far Western",
12 "FRET",
13 "PCA",
14 "Co-crystal Structure",
15 "Co-purification",
16 "Two-hybrid",
17 "Affinity Capture-MS"]
7 18
8 19
9 def getIds(rawIds): 20 def getIds(rawIds):
10 return rawIds.split("|") 21 return rawIds.split("|")
11 22
211 elif (regionA not in locId and regionB in locId): 222 elif (regionA not in locId and regionB in locId):
212 locations[regionB].append(uniId) 223 locations[regionB].append(uniId)
213 filterAList = sorted(locations[regionA]) 224 filterAList = sorted(locations[regionA])
214 filterBList = sorted(locations[regionB]) 225 filterBList = sorted(locations[regionB])
215 else: 226 else:
216 filterAList = list(filterA) 227 filterAList = sorted(filterA)
217 filterBList = list(filterB) 228 filterBList = sorted(filterB)
218 for i, j in randomPairs(len(filterAList), len(filterBList), jSize): 229 for i, j in randomPairs(len(filterAList), len(filterBList), jSize):
219 nameA = filterAList[i] 230 nameA = filterAList[i]
220 nameB = filterBList[j] 231 nameB = filterBList[j]
221 key = getKey(nameA, nameB) 232 key = getKey(nameA, nameB)
222 if key not in negative: 233 if key not in negative:
249 filterB = filterSets[filterKeys[1]] 260 filterB = filterSets[filterKeys[1]]
250 else: 261 else:
251 filterB = filterA 262 filterB = filterA
252 263
253 # identify biogrid filter options 264 # identify biogrid filter options
254 filterValues = list() 265 performance = dict()
255 filterValues.append([11, args.method]) 266 for methodReference in METHODS:
256 267
257 # process biogrid database 268 # process biogrid database
258 print("Loading positive set from BioGRID file...") 269 print("Loading positive set from BioGRID file (%s)..." % methodReference)
259 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, 270 filterValues = [[11, methodReference]]
260 separator="\t", filterA=filterA, 271 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
261 filterB=filterB, skipFirstLine=True, 272 separator="\t", filterA=filterA,
262 filterValues=filterValues) 273 filterB=filterB, skipFirstLine=True,
263 274 filterValues=filterValues)
264 # estimate negative set 275
265 negative = getNegativeSet(args, filterA, filterB, positiveCount) 276 # estimate negative set
266 277 negative = getNegativeSet(args, filterA, filterB, positiveCount)
267 # get prediction results 278
268 print("Loading prediction file...") 279 # evaluate other methods
269 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8) 280 yValues = list()
270 mcc = getMCC(prediction, positive, positiveCount, negative) 281 for method in METHODS:
271 yValues = [mcc] 282 if methodReference != method:
272 yTicks = ["SPRING"] 283 print("Method: %s" % method)
273 284 filterValues = [[11, method]]
274 # identify biogrid filter options 285 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26,
275 for method in ["Affinity Capture-MS", 286 separator="\t", filterA=filterA,
276 "Biochemical Activity", 287 filterB=filterB, skipFirstLine=True,
277 "Co-crystal Structure", 288 filterValues=filterValues)
278 "Co-fractionation", 289 mcc = getMCC(prediction, positive, positiveCount, negative)
279 "Co-localization", 290 yValues.append(mcc)
280 "Co-purification", 291 else:
281 "Far Western", 292 yValues.append(0.0)
282 "FRET", 293
283 "PCA", 294 # add results to performance dication
284 "Reconstituted Complex", 295 performance[methodReference] = yValues
285 "Two-hybrid"]: 296
286 if args.method != method: 297 # get and append prediction results
287 print("Method: %s" % method) 298 print("Loading prediction file...")
288 filterValues = [[11, method]] 299 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0)
289 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, 300 mcc = getMCC(prediction, positive, positiveCount, negative)
290 separator="\t", filterA=filterA, 301 performance[methodReference].append(mcc)
291 filterB=filterB, skipFirstLine=True, 302
292 filterValues=filterValues) 303 # build yTicks
293 mcc = getMCC(prediction, positive, positiveCount, negative) 304 yTicks = METHODS[:]
294 yValues.append(mcc) 305 yTicks.append("SPRING")
295 yTicks.append(method)
296 306
297 # create plot 307 # create plot
298 print("Producing plot data...") 308 print("Producing plot data...")
299 print("Total count in prediction file: %d." % len(prediction)) 309 print("Total count in prediction file: %d." % len(prediction))
300 print("Total count in positive file: %d." % len(positive)) 310 print("Total count in positive file: %d." % len(positive))
301 plt.xlabel("Matthews-Correlation Coefficient (MCC)") 311 df = pd.DataFrame(performance, index=yTicks)
302 plt.title("Positive set: %s" % args.method) 312 ax = df.plot.barh()
303 plt.barh(yTicks, yValues) 313 ax.set_title(args.experiment)
314 ax.set_xlabel("Matthews-Correlation Coefficient (MCC)")
315 plt = ax.get_figure()
304 plt.tight_layout() 316 plt.tight_layout()
305 plt.savefig(args.output, format="png") 317 plt.savefig(args.output, format="png")
306 318
307 319
308 if __name__ == "__main__": 320 if __name__ == "__main__":
312 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) 324 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
313 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False) 325 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False)
314 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) 326 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False)
315 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) 327 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False)
316 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) 328 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False)
317 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False) 329 parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results")
318 parser.add_argument('-o', '--output', help='Output (png)', required=True) 330 parser.add_argument('-o', '--output', help='Output (png)', required=True)
319 args = parser.parse_args() 331 args = parser.parse_args()
320 main(args) 332 main(args)