Mercurial > repos > guerler > springsuite
comparison spring_roc.py @ 30:b0e195a47df7 draft
"planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
author | guerler |
---|---|
date | Tue, 24 Nov 2020 14:02:08 +0000 |
parents | 41353488926c |
children | 3071750405c9 |
comparison
equal
deleted
inserted
replaced
29:41353488926c | 30:b0e195a47df7 |
---|---|
1 #! /usr/bin/env python | 1 #! /usr/bin/env python |
2 import argparse | 2 import argparse |
3 import math | 3 import math |
4 import random | 4 import random |
5 from os.path import isfile | |
5 from datetime import datetime | 6 from datetime import datetime |
6 | 7 |
7 from matplotlib import pyplot as plt | 8 from matplotlib import pyplot as plt |
8 | 9 |
9 | 10 |
204 | 205 |
205 # process prediction file | 206 # process prediction file |
206 print("Loading prediction file...") | 207 print("Loading prediction file...") |
207 prediction, _ = getReference(args.input, scoreCol=2) | 208 prediction, _ = getReference(args.input, scoreCol=2) |
208 | 209 |
210 # get subcellular locations from UniProt export | |
211 locations = dict() | |
212 if isfile(args.locations): | |
213 with open(args.locations) as locFile: | |
214 for line in locFile: | |
215 searchKey = "SUBCELLULAR LOCATION" | |
216 searchPos = line.find(searchKey) | |
217 if searchPos != -1: | |
218 uniId = line.split()[0] | |
219 locStart = searchPos + len(searchKey) + 1 | |
220 locId = line[locStart:].split()[0] | |
221 if locId in ["Nucleus", "Membrane", "Cytoplasm"]: | |
222 if uniId in filterA and uniId in filterB: | |
223 locations[uniId] = locId | |
224 print("Found %d subcellular locations." % (len(list(locations.keys())))) | |
225 | |
209 # estimate background noise | 226 # estimate background noise |
210 print("Estimating background noise...") | 227 print("Estimating background noise...") |
211 negative = set() | 228 negative = set() |
212 filterAList = list(filterA) | 229 filterAList = sorted(list(filterA)) |
213 filterBList = list(filterB) | 230 filterBList = sorted(list(filterB)) |
214 negativeCount = positiveCount | 231 negativeRequired = positiveCount |
215 negativeRequired = negativeCount | 232 random.seed(0) |
216 random.seed(datetime.now()) | 233 totalAttempts = int(len(filterAList) * len(filterBList) / 2) |
217 while negativeRequired > 0: | 234 while totalAttempts > 0: |
235 totalAttempts = totalAttempts - 1 | |
218 nameA = random.choice(filterAList) | 236 nameA = random.choice(filterAList) |
219 nameB = random.choice(filterBList) | 237 nameB = random.choice(filterBList) |
238 if locations: | |
239 if nameA not in locations or nameB not in locations: | |
240 continue | |
241 if locations[nameA] == locations[nameB]: | |
242 continue | |
220 key = getKey(nameA, nameB) | 243 key = getKey(nameA, nameB) |
221 if key not in putative and key not in negative: | 244 if key not in putative and key not in negative: |
222 negative.add(key) | 245 negative.add(key) |
223 negativeRequired = negativeRequired - 1 | 246 negativeRequired = negativeRequired - 1 |
247 if negativeRequired == 0: | |
248 break | |
224 | 249 |
225 # create plot | 250 # create plot |
226 print("Producing plot data...") | 251 print("Producing plot data...") |
227 print("Total count in prediction file: %d." % len(prediction)) | 252 print("Total count in prediction file: %d." % len(prediction)) |
228 print("Total count in positive file: %d." % len(positive)) | 253 print("Total count in positive file: %d." % len(positive)) |
239 plt.savefig(args.output, format="png") | 264 plt.savefig(args.output, format="png") |
240 | 265 |
241 | 266 |
242 if __name__ == "__main__": | 267 if __name__ == "__main__": |
243 parser = argparse.ArgumentParser(description='Create ROC plot.') | 268 parser = argparse.ArgumentParser(description='Create ROC plot.') |
244 parser.add_argument('-i', '--input', help='Input prediction file.', | 269 parser.add_argument('-i', '--input', help='Input prediction file.', required=True) |
245 required=True) | 270 parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True) |
246 parser.add_argument('-b', '--biogrid', help='BioGRID interaction ' + | 271 parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) |
247 'database file', required=True) | 272 parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False) |
248 parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', | 273 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False) |
249 default="", required=False) | 274 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False) |
250 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', | |
251 default="", required=False) | |
252 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', | |
253 default="", required=False) | |
254 parser.add_argument('-o', '--output', help='Output (png)', required=True) | 275 parser.add_argument('-o', '--output', help='Output (png)', required=True) |
255 args = parser.parse_args() | 276 args = parser.parse_args() |
256 main(args) | 277 main(args) |