# HG changeset patch
# User guerler
# Date 1606240507 0
# Node ID b300ddbbf9d086af92a3fe42ab39084dff843db1
# Parent f115fbf3ac6355f6bea54ea28d9f06b8570adfd2
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
diff -r f115fbf3ac63 -r b300ddbbf9d0 spring_roc.py
--- a/spring_roc.py Tue Nov 24 17:27:22 2020 +0000
+++ b/spring_roc.py Tue Nov 24 17:55:07 2020 +0000
@@ -3,7 +3,6 @@
import math
import random
from os.path import isfile
-from datetime import datetime
from matplotlib import pyplot as plt
@@ -207,50 +206,62 @@
print("Loading prediction file...")
prediction, _ = getReference(args.input, scoreCol=2)
- # get subcellular locations from UniProt export
- locations = dict()
- if isfile(args.locations):
- regions = list()
- if args.regions:
- regions = args.regions.split(",")
- with open(args.locations) as locFile:
- for line in locFile:
- searchKey = "SUBCELLULAR LOCATION"
- searchPos = line.find(searchKey)
- if searchPos != -1:
- uniId = line.split()[0]
- locStart = searchPos + len(searchKey) + 1
- locId = line[locStart:].split()[0]
- if regions:
- if locId not in regions:
- continue
- if uniId in filterA or uniId in filterB:
- locations[uniId] = locId
- print("Found %d subcellular locations." % (len(list(locations.keys()))))
+ # determine negative set
+ print("Identifying non-interacting pairs...")
+ negative = set()
+ if isfile(args.negative):
+ # load from explicit file
+ with open(args.negative) as file:
+ for line in file:
+ cols = line.split()
+ nameA = cols[0]
+ nameB = cols[1]
+ key = getKey(nameA, nameB)
+ if key not in putative and key not in negative:
+ negative.add(key)
+ else:
+ # get subcellular locations from UniProt export
+ locations = dict()
+ if isfile(args.locations):
+ regions = list()
+ if args.regions:
+ regions = args.regions.split(",")
+ with open(args.locations) as locFile:
+ for line in locFile:
+ searchKey = "SUBCELLULAR LOCATION"
+ searchPos = line.find(searchKey)
+ if searchPos != -1:
+ uniId = line.split()[0]
+ locStart = searchPos + len(searchKey) + 1
+ locId = line[locStart:].split()[0]
+ if regions:
+ if locId not in regions:
+ continue
+ if uniId in filterA or uniId in filterB:
+ locations[uniId] = locId
+ print("Found %d subcellular locations." % (len(list(locations.keys()))))
- # estimate background noise
- print("Estimating background noise...")
- negative = set()
- filterAList = sorted(list(filterA))
- filterBList = sorted(list(filterB))
- negativeRequired = positiveCount
- random.seed(0)
- totalAttempts = int(len(filterAList) * len(filterBList) / 2)
- while totalAttempts > 0:
- totalAttempts = totalAttempts - 1
- nameA = random.choice(filterAList)
- nameB = random.choice(filterBList)
- if locations:
- if nameA not in locations or nameB not in locations:
- continue
- if locations[nameA] == locations[nameB]:
- continue
- key = getKey(nameA, nameB)
- if key not in putative and key not in negative:
- negative.add(key)
- negativeRequired = negativeRequired - 1
- if negativeRequired == 0:
- break
+ # randomly sample non-interacting pairs
+ filterAList = sorted(list(filterA))
+ filterBList = sorted(list(filterB))
+ negativeRequired = positiveCount
+ random.seed(0)
+ totalAttempts = int(len(filterAList) * len(filterBList) / 2)
+ while totalAttempts > 0:
+ totalAttempts = totalAttempts - 1
+ nameA = random.choice(filterAList)
+ nameB = random.choice(filterBList)
+ if locations:
+ if nameA not in locations or nameB not in locations:
+ continue
+ if locations[nameA] == locations[nameB]:
+ continue
+ key = getKey(nameA, nameB)
+ if key not in putative and key not in negative:
+ negative.add(key)
+ negativeRequired = negativeRequired - 1
+ if negativeRequired == 0:
+ break
# create plot
print("Producing plot data...")
@@ -271,10 +282,11 @@
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Create ROC plot.')
- parser.add_argument('-i', '--input', help='Input prediction file.', required=True)
+ parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True)
parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True)
- parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
+ parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', default="", required=False)
parser.add_argument('-r', '--regions', help='Comma-separated regions', required=False)
+ parser.add_argument('-n', '--negative', help='Negative set (2-columns)', default="", required=False)
parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False)
parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False)
parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False)
diff -r f115fbf3ac63 -r b300ddbbf9d0 spring_roc.xml
--- a/spring_roc.xml Tue Nov 24 17:27:22 2020 +0000
+++ b/spring_roc.xml Tue Nov 24 17:55:07 2020 +0000
@@ -4,7 +4,7 @@
matplotlib
@@ -13,9 +13,11 @@
+
-
+
+
@@ -24,9 +26,15 @@
+
+
+
+
+
+