Mercurial > repos > guerler > springsuite
comparison spring_mcc.py @ 41:f316caf098a6 draft default tip
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
| author | guerler |
|---|---|
| date | Mon, 01 Mar 2021 15:02:36 +0000 |
| parents | 172398348efd |
| children |
comparison
equal
deleted
inserted
replaced
| 40:06337927c198 | 41:f316caf098a6 |
|---|---|
| 1 #! /usr/bin/env python | 1 #! /usr/bin/env python |
| 2 import argparse | 2 import argparse |
| 3 import math | 3 import math |
| 4 import pandas as pd | |
| 4 from os.path import isfile | 5 from os.path import isfile |
| 5 import re | 6 import re |
| 6 from matplotlib import pyplot as plt | 7 |
| 8 METHODS = ["Biochemical Activity", | |
| 9 "Co-fractionation", | |
| 10 "Co-localization", | |
| 11 "Far Western", | |
| 12 "FRET", | |
| 13 "PCA", | |
| 14 "Co-crystal Structure", | |
| 15 "Co-purification", | |
| 16 "Two-hybrid", | |
| 17 "Affinity Capture-MS"] | |
| 7 | 18 |
| 8 | 19 |
| 9 def getIds(rawIds): | 20 def getIds(rawIds): |
| 10 return rawIds.split("|") | 21 return rawIds.split("|") |
| 11 | 22 |
| 211 elif (regionA not in locId and regionB in locId): | 222 elif (regionA not in locId and regionB in locId): |
| 212 locations[regionB].append(uniId) | 223 locations[regionB].append(uniId) |
| 213 filterAList = sorted(locations[regionA]) | 224 filterAList = sorted(locations[regionA]) |
| 214 filterBList = sorted(locations[regionB]) | 225 filterBList = sorted(locations[regionB]) |
| 215 else: | 226 else: |
| 216 filterAList = list(filterA) | 227 filterAList = sorted(filterA) |
| 217 filterBList = list(filterB) | 228 filterBList = sorted(filterB) |
| 218 for i, j in randomPairs(len(filterAList), len(filterBList), jSize): | 229 for i, j in randomPairs(len(filterAList), len(filterBList), jSize): |
| 219 nameA = filterAList[i] | 230 nameA = filterAList[i] |
| 220 nameB = filterBList[j] | 231 nameB = filterBList[j] |
| 221 key = getKey(nameA, nameB) | 232 key = getKey(nameA, nameB) |
| 222 if key not in negative: | 233 if key not in negative: |
| 249 filterB = filterSets[filterKeys[1]] | 260 filterB = filterSets[filterKeys[1]] |
| 250 else: | 261 else: |
| 251 filterB = filterA | 262 filterB = filterA |
| 252 | 263 |
| 253 # identify biogrid filter options | 264 # identify biogrid filter options |
| 254 filterValues = list() | 265 performance = dict() |
| 255 filterValues.append([11, args.method]) | 266 for methodReference in METHODS: |
| 256 | 267 |
| 257 # process biogrid database | 268 # process biogrid database |
| 258 print("Loading positive set from BioGRID file...") | 269 print("Loading positive set from BioGRID file (%s)..." % methodReference) |
| 259 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, | 270 filterValues = [[11, methodReference]] |
| 260 separator="\t", filterA=filterA, | 271 positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, |
| 261 filterB=filterB, skipFirstLine=True, | 272 separator="\t", filterA=filterA, |
| 262 filterValues=filterValues) | 273 filterB=filterB, skipFirstLine=True, |
| 263 | 274 filterValues=filterValues) |
| 264 # estimate negative set | 275 |
| 265 negative = getNegativeSet(args, filterA, filterB, positiveCount) | 276 # estimate negative set |
| 266 | 277 negative = getNegativeSet(args, filterA, filterB, positiveCount) |
| 267 # get prediction results | 278 |
| 268 print("Loading prediction file...") | 279 # evaluate other methods |
| 269 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8) | 280 yValues = list() |
| 270 mcc = getMCC(prediction, positive, positiveCount, negative) | 281 for method in METHODS: |
| 271 yValues = [mcc] | 282 if methodReference != method: |
| 272 yTicks = ["SPRING"] | 283 print("Method: %s" % method) |
| 273 | 284 filterValues = [[11, method]] |
| 274 # identify biogrid filter options | 285 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, |
| 275 for method in ["Affinity Capture-MS", | 286 separator="\t", filterA=filterA, |
| 276 "Biochemical Activity", | 287 filterB=filterB, skipFirstLine=True, |
| 277 "Co-crystal Structure", | 288 filterValues=filterValues) |
| 278 "Co-fractionation", | 289 mcc = getMCC(prediction, positive, positiveCount, negative) |
| 279 "Co-localization", | 290 yValues.append(mcc) |
| 280 "Co-purification", | 291 else: |
| 281 "Far Western", | 292 yValues.append(0.0) |
| 282 "FRET", | 293 |
| 283 "PCA", | 294 # add results to performance dication |
| 284 "Reconstituted Complex", | 295 performance[methodReference] = yValues |
| 285 "Two-hybrid"]: | 296 |
| 286 if args.method != method: | 297 # get and append prediction results |
| 287 print("Method: %s" % method) | 298 print("Loading prediction file...") |
| 288 filterValues = [[11, method]] | 299 prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0) |
| 289 prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, | 300 mcc = getMCC(prediction, positive, positiveCount, negative) |
| 290 separator="\t", filterA=filterA, | 301 performance[methodReference].append(mcc) |
| 291 filterB=filterB, skipFirstLine=True, | 302 |
| 292 filterValues=filterValues) | 303 # build yTicks |
| 293 mcc = getMCC(prediction, positive, positiveCount, negative) | 304 yTicks = METHODS[:] |
| 294 yValues.append(mcc) | 305 yTicks.append("SPRING") |
| 295 yTicks.append(method) | |
| 296 | 306 |
| 297 # create plot | 307 # create plot |
| 298 print("Producing plot data...") | 308 print("Producing plot data...") |
| 299 print("Total count in prediction file: %d." % len(prediction)) | 309 print("Total count in prediction file: %d." % len(prediction)) |
| 300 print("Total count in positive file: %d." % len(positive)) | 310 print("Total count in positive file: %d." % len(positive)) |
| 301 plt.xlabel("Matthews-Correlation Coefficient (MCC)") | 311 df = pd.DataFrame(performance, index=yTicks) |
| 302 plt.title("Positive set: %s" % args.method) | 312 ax = df.plot.barh() |
| 303 plt.barh(yTicks, yValues) | 313 ax.set_title(args.experiment) |
| 314 ax.set_xlabel("Matthews-Correlation Coefficient (MCC)") | |
| 315 plt = ax.get_figure() | |
| 304 plt.tight_layout() | 316 plt.tight_layout() |
| 305 plt.savefig(args.output, format="png") | 317 plt.savefig(args.output, format="png") |
| 306 | 318 |
| 307 | 319 |
| 308 if __name__ == "__main__": | 320 if __name__ == "__main__": |
| 312 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) | 324 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) |
| 313 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False) | 325 parser.add_argument('-ra', '--region_a', help='First subcellular location', required=False) |
| 314 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) | 326 parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) |
| 315 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) | 327 parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) |
| 316 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) | 328 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) |
| 317 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False) | 329 parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results") |
| 318 parser.add_argument('-o', '--output', help='Output (png)', required=True) | 330 parser.add_argument('-o', '--output', help='Output (png)', required=True) |
| 319 args = parser.parse_args() | 331 args = parser.parse_args() |
| 320 main(args) | 332 main(args) |
