changeset 30:b0e195a47df7 draft

"planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
author guerler
date Tue, 24 Nov 2020 14:02:08 +0000
parents 41353488926c
children 3071750405c9
files out.png spring_minz.xml spring_roc.py spring_roc.xml test-data/roc/human_hv1h2.png
diffstat 5 files changed, 41 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
Binary file out.png has changed
--- a/spring_minz.xml	Sun Nov 22 14:15:24 2020 +0000
+++ b/spring_minz.xml	Tue Nov 24 14:02:08 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="spring_minz" name="SPRING min-Z" version="0.1.2" python_template_version="3.5">
+<tool id="spring_minz" name="SPRING min-Z" version="0.1.2" python_template_version="3.5" license="MIT">
     <description>filter operation</description>
     <command detect_errors="exit_code"><![CDATA[
         mkdir -p targets &&
--- a/spring_roc.py	Sun Nov 22 14:15:24 2020 +0000
+++ b/spring_roc.py	Tue Nov 24 14:02:08 2020 +0000
@@ -2,6 +2,7 @@
 import argparse
 import math
 import random
+from os.path import isfile
 from datetime import datetime
 
 from matplotlib import pyplot as plt
@@ -206,21 +207,45 @@
     print("Loading prediction file...")
     prediction, _ = getReference(args.input, scoreCol=2)
 
+    # get subcellular locations from UniProt export
+    locations = dict()
+    if isfile(args.locations):
+        with open(args.locations) as locFile:
+            for line in locFile:
+                searchKey = "SUBCELLULAR LOCATION"
+                searchPos = line.find(searchKey)
+                if searchPos != -1:
+                    uniId = line.split()[0]
+                    locStart = searchPos + len(searchKey) + 1
+                    locId = line[locStart:].split()[0]
+                    if locId in ["Nucleus", "Membrane", "Cytoplasm"]:
+                        if uniId in filterA and uniId in filterB:
+                            locations[uniId] = locId
+        print("Found %d subcellular locations." % (len(list(locations.keys()))))
+
     # estimate background noise
     print("Estimating background noise...")
     negative = set()
-    filterAList = list(filterA)
-    filterBList = list(filterB)
-    negativeCount = positiveCount
-    negativeRequired = negativeCount
-    random.seed(datetime.now())
-    while negativeRequired > 0:
+    filterAList = sorted(list(filterA))
+    filterBList = sorted(list(filterB))
+    negativeRequired = positiveCount
+    random.seed(0)
+    totalAttempts = int(len(filterAList) * len(filterBList) / 2)
+    while totalAttempts > 0:
+        totalAttempts = totalAttempts - 1
         nameA = random.choice(filterAList)
         nameB = random.choice(filterBList)
+        if locations:
+            if nameA not in locations or nameB not in locations:
+                continue
+            if locations[nameA] == locations[nameB]:
+                continue
         key = getKey(nameA, nameB)
         if key not in putative and key not in negative:
             negative.add(key)
             negativeRequired = negativeRequired - 1
+            if negativeRequired == 0:
+                break
 
     # create plot
     print("Producing plot data...")
@@ -241,16 +266,12 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Create ROC plot.')
-    parser.add_argument('-i', '--input', help='Input prediction file.',
-                        required=True)
-    parser.add_argument('-b', '--biogrid', help='BioGRID interaction ' +
-                        'database file', required=True)
-    parser.add_argument('-e', '--experiment', help='Type (physical/genetic)',
-                        default="", required=False)
-    parser.add_argument('-t', '--throughput', help='Throughput (low/high)',
-                        default="", required=False)
-    parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid',
-                        default="", required=False)
+    parser.add_argument('-i', '--input', help='Input prediction file.', required=True)
+    parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True)
+    parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
+    parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False)
+    parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False)
+    parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False)
     parser.add_argument('-o', '--output', help='Output (png)', required=True)
     args = parser.parse_args()
     main(args)
--- a/spring_roc.xml	Sun Nov 22 14:15:24 2020 +0000
+++ b/spring_roc.xml	Tue Nov 24 14:02:08 2020 +0000
@@ -1,14 +1,15 @@
-<tool id="spring_roc" name="SPRING ROC" version="0.1.0" python_template_version="3.5">
+<tool id="spring_roc" name="SPRING ROC" version="0.1.0" python_template_version="3.5" license="MIT">
     <description>plot generator</description>
     <requirements>
         <requirement type="package" version="3.3.3">matplotlib</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        python3 '$__tool_directory__/spring_roc.py' -i '$input' -b '$database' -e '$experiment.type' -m '$experiment.method' -t '$throughput' -o '$rocplot'
+        python3 '$__tool_directory__/spring_roc.py' -i '$input' -b '$database' -e '$experiment.type' -m '$experiment.method' -t '$throughput' -l '$locations' -o '$rocplot'
     ]]></command>
     <inputs>
         <param format="tabular" name="input" type="data" label="Interactions" help="Prediction Input Table with 2-columns containing UniProt Accession codes."/>
         <param format="tabular" name="database" type="data" label="BioGRID Database" help="BioGRID Database in TAB 3.0 format."/>
+        <param format="tabular" name="locations" type="data" label="UniProt Localization" help="UniProt tabular export with localization column to sample non-interacting pairs." optional="True" />
         <conditional name="experiment">
             <param name="type" type="select" label="Experimental Type" display="radio" help="Choose a specific experimental system type.">
                 <option value="">Any</option>
@@ -58,10 +59,6 @@
         <test>
             <param format="tabular" name="input" value="roc/human_hv1h2.txt" />
             <param format="tabular" name="database" value="roc/biogrid_fret.txt" />
-            <conditional name="experiment">
-                <param name="type" value="physical" />
-                <param name="method" value="Two-hybrid" />
-            </conditional>
             <output name="output" file="roc/human_hv1h2.png" />
         </test>
     </tests>
Binary file test-data/roc/human_hv1h2.png has changed