comparison spring_roc.py @ 30:b0e195a47df7 draft

"planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
author guerler
date Tue, 24 Nov 2020 14:02:08 +0000
parents 41353488926c
children 3071750405c9
comparison
equal deleted inserted replaced
29:41353488926c 30:b0e195a47df7
1 #! /usr/bin/env python 1 #! /usr/bin/env python
2 import argparse 2 import argparse
3 import math 3 import math
4 import random 4 import random
5 from os.path import isfile
5 from datetime import datetime 6 from datetime import datetime
6 7
7 from matplotlib import pyplot as plt 8 from matplotlib import pyplot as plt
8 9
9 10
204 205
205 # process prediction file 206 # process prediction file
206 print("Loading prediction file...") 207 print("Loading prediction file...")
207 prediction, _ = getReference(args.input, scoreCol=2) 208 prediction, _ = getReference(args.input, scoreCol=2)
208 209
210 # get subcellular locations from UniProt export
211 locations = dict()
212 if isfile(args.locations):
213 with open(args.locations) as locFile:
214 for line in locFile:
215 searchKey = "SUBCELLULAR LOCATION"
216 searchPos = line.find(searchKey)
217 if searchPos != -1:
218 uniId = line.split()[0]
219 locStart = searchPos + len(searchKey) + 1
220 locId = line[locStart:].split()[0]
221 if locId in ["Nucleus", "Membrane", "Cytoplasm"]:
222 if uniId in filterA and uniId in filterB:
223 locations[uniId] = locId
224 print("Found %d subcellular locations." % (len(list(locations.keys()))))
225
209 # estimate background noise 226 # estimate background noise
210 print("Estimating background noise...") 227 print("Estimating background noise...")
211 negative = set() 228 negative = set()
212 filterAList = list(filterA) 229 filterAList = sorted(list(filterA))
213 filterBList = list(filterB) 230 filterBList = sorted(list(filterB))
214 negativeCount = positiveCount 231 negativeRequired = positiveCount
215 negativeRequired = negativeCount 232 random.seed(0)
216 random.seed(datetime.now()) 233 totalAttempts = int(len(filterAList) * len(filterBList) / 2)
217 while negativeRequired > 0: 234 while totalAttempts > 0:
235 totalAttempts = totalAttempts - 1
218 nameA = random.choice(filterAList) 236 nameA = random.choice(filterAList)
219 nameB = random.choice(filterBList) 237 nameB = random.choice(filterBList)
238 if locations:
239 if nameA not in locations or nameB not in locations:
240 continue
241 if locations[nameA] == locations[nameB]:
242 continue
220 key = getKey(nameA, nameB) 243 key = getKey(nameA, nameB)
221 if key not in putative and key not in negative: 244 if key not in putative and key not in negative:
222 negative.add(key) 245 negative.add(key)
223 negativeRequired = negativeRequired - 1 246 negativeRequired = negativeRequired - 1
247 if negativeRequired == 0:
248 break
224 249
225 # create plot 250 # create plot
226 print("Producing plot data...") 251 print("Producing plot data...")
227 print("Total count in prediction file: %d." % len(prediction)) 252 print("Total count in prediction file: %d." % len(prediction))
228 print("Total count in positive file: %d." % len(positive)) 253 print("Total count in positive file: %d." % len(positive))
239 plt.savefig(args.output, format="png") 264 plt.savefig(args.output, format="png")
240 265
241 266
242 if __name__ == "__main__": 267 if __name__ == "__main__":
243 parser = argparse.ArgumentParser(description='Create ROC plot.') 268 parser = argparse.ArgumentParser(description='Create ROC plot.')
244 parser.add_argument('-i', '--input', help='Input prediction file.', 269 parser.add_argument('-i', '--input', help='Input prediction file.', required=True)
245 required=True) 270 parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True)
246 parser.add_argument('-b', '--biogrid', help='BioGRID interaction ' + 271 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
247 'database file', required=True) 272 parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False)
248 parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', 273 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False)
249 default="", required=False) 274 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False)
250 parser.add_argument('-t', '--throughput', help='Throughput (low/high)',
251 default="", required=False)
252 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid',
253 default="", required=False)
254 parser.add_argument('-o', '--output', help='Output (png)', required=True) 275 parser.add_argument('-o', '--output', help='Output (png)', required=True)
255 args = parser.parse_args() 276 args = parser.parse_args()
256 main(args) 277 main(args)