Mercurial > repos > guerler > springsuite
diff spring_roc.py @ 30:b0e195a47df7 draft
"planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
author | guerler |
---|---|
date | Tue, 24 Nov 2020 14:02:08 +0000 |
parents | 41353488926c |
children | 3071750405c9 |
line wrap: on
line diff
--- a/spring_roc.py Sun Nov 22 14:15:24 2020 +0000 +++ b/spring_roc.py Tue Nov 24 14:02:08 2020 +0000 @@ -2,6 +2,7 @@ import argparse import math import random +from os.path import isfile from datetime import datetime from matplotlib import pyplot as plt @@ -206,21 +207,45 @@ print("Loading prediction file...") prediction, _ = getReference(args.input, scoreCol=2) + # get subcellular locations from UniProt export + locations = dict() + if isfile(args.locations): + with open(args.locations) as locFile: + for line in locFile: + searchKey = "SUBCELLULAR LOCATION" + searchPos = line.find(searchKey) + if searchPos != -1: + uniId = line.split()[0] + locStart = searchPos + len(searchKey) + 1 + locId = line[locStart:].split()[0] + if locId in ["Nucleus", "Membrane", "Cytoplasm"]: + if uniId in filterA and uniId in filterB: + locations[uniId] = locId + print("Found %d subcellular locations." % (len(list(locations.keys())))) + # estimate background noise print("Estimating background noise...") negative = set() - filterAList = list(filterA) - filterBList = list(filterB) - negativeCount = positiveCount - negativeRequired = negativeCount - random.seed(datetime.now()) - while negativeRequired > 0: + filterAList = sorted(list(filterA)) + filterBList = sorted(list(filterB)) + negativeRequired = positiveCount + random.seed(0) + totalAttempts = int(len(filterAList) * len(filterBList) / 2) + while totalAttempts > 0: + totalAttempts = totalAttempts - 1 nameA = random.choice(filterAList) nameB = random.choice(filterBList) + if locations: + if nameA not in locations or nameB not in locations: + continue + if locations[nameA] == locations[nameB]: + continue key = getKey(nameA, nameB) if key not in putative and key not in negative: negative.add(key) negativeRequired = negativeRequired - 1 + if negativeRequired == 0: + break # create plot print("Producing plot data...") @@ -241,16 +266,12 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description='Create ROC plot.') - parser.add_argument('-i', '--input', help='Input prediction file.', - required=True) - parser.add_argument('-b', '--biogrid', help='BioGRID interaction ' + - 'database file', required=True) - parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', - default="", required=False) - parser.add_argument('-t', '--throughput', help='Throughput (low/high)', - default="", required=False) - parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', - default="", required=False) + parser.add_argument('-i', '--input', help='Input prediction file.', required=True) + parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True) + parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) + parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False) + parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False) + parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False) parser.add_argument('-o', '--output', help='Output (png)', required=True) args = parser.parse_args() main(args)