Mercurial > repos > guerler > springsuite
changeset 30:b0e195a47df7 draft
"planemo upload commit b306c715d8284c097220bc5c8461399fdc05eac7"
author | guerler |
---|---|
date | Tue, 24 Nov 2020 14:02:08 +0000 |
parents | 41353488926c |
children | 3071750405c9 |
files | out.png spring_minz.xml spring_roc.py spring_roc.xml test-data/roc/human_hv1h2.png |
diffstat | 5 files changed, 41 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/spring_minz.xml Sun Nov 22 14:15:24 2020 +0000 +++ b/spring_minz.xml Tue Nov 24 14:02:08 2020 +0000 @@ -1,4 +1,4 @@ -<tool id="spring_minz" name="SPRING min-Z" version="0.1.2" python_template_version="3.5"> +<tool id="spring_minz" name="SPRING min-Z" version="0.1.2" python_template_version="3.5" license="MIT"> <description>filter operation</description> <command detect_errors="exit_code"><![CDATA[ mkdir -p targets &&
--- a/spring_roc.py Sun Nov 22 14:15:24 2020 +0000 +++ b/spring_roc.py Tue Nov 24 14:02:08 2020 +0000 @@ -2,6 +2,7 @@ import argparse import math import random +from os.path import isfile from datetime import datetime from matplotlib import pyplot as plt @@ -206,21 +207,45 @@ print("Loading prediction file...") prediction, _ = getReference(args.input, scoreCol=2) + # get subcellular locations from UniProt export + locations = dict() + if isfile(args.locations): + with open(args.locations) as locFile: + for line in locFile: + searchKey = "SUBCELLULAR LOCATION" + searchPos = line.find(searchKey) + if searchPos != -1: + uniId = line.split()[0] + locStart = searchPos + len(searchKey) + 1 + locId = line[locStart:].split()[0] + if locId in ["Nucleus", "Membrane", "Cytoplasm"]: + if uniId in filterA and uniId in filterB: + locations[uniId] = locId + print("Found %d subcellular locations." % (len(list(locations.keys())))) + # estimate background noise print("Estimating background noise...") negative = set() - filterAList = list(filterA) - filterBList = list(filterB) - negativeCount = positiveCount - negativeRequired = negativeCount - random.seed(datetime.now()) - while negativeRequired > 0: + filterAList = sorted(list(filterA)) + filterBList = sorted(list(filterB)) + negativeRequired = positiveCount + random.seed(0) + totalAttempts = int(len(filterAList) * len(filterBList) / 2) + while totalAttempts > 0: + totalAttempts = totalAttempts - 1 nameA = random.choice(filterAList) nameB = random.choice(filterBList) + if locations: + if nameA not in locations or nameB not in locations: + continue + if locations[nameA] == locations[nameB]: + continue key = getKey(nameA, nameB) if key not in putative and key not in negative: negative.add(key) negativeRequired = negativeRequired - 1 + if negativeRequired == 0: + break # create plot print("Producing plot data...") @@ -241,16 +266,12 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description='Create ROC plot.') - parser.add_argument('-i', '--input', help='Input prediction file.', - required=True) - parser.add_argument('-b', '--biogrid', help='BioGRID interaction ' + - 'database file', required=True) - parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', - default="", required=False) - parser.add_argument('-t', '--throughput', help='Throughput (low/high)', - default="", required=False) - parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', - default="", required=False) + parser.add_argument('-i', '--input', help='Input prediction file.', required=True) + parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True) + parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) + parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False) + parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False) + parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False) parser.add_argument('-o', '--output', help='Output (png)', required=True) args = parser.parse_args() main(args)
--- a/spring_roc.xml Sun Nov 22 14:15:24 2020 +0000 +++ b/spring_roc.xml Tue Nov 24 14:02:08 2020 +0000 @@ -1,14 +1,15 @@ -<tool id="spring_roc" name="SPRING ROC" version="0.1.0" python_template_version="3.5"> +<tool id="spring_roc" name="SPRING ROC" version="0.1.0" python_template_version="3.5" license="MIT"> <description>plot generator</description> <requirements> <requirement type="package" version="3.3.3">matplotlib</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - python3 '$__tool_directory__/spring_roc.py' -i '$input' -b '$database' -e '$experiment.type' -m '$experiment.method' -t '$throughput' -o '$rocplot' + python3 '$__tool_directory__/spring_roc.py' -i '$input' -b '$database' -e '$experiment.type' -m '$experiment.method' -t '$throughput' -l '$locations' -o '$rocplot' ]]></command> <inputs> <param format="tabular" name="input" type="data" label="Interactions" help="Prediction Input Table with 2-columns containing UniProt Accession codes."/> <param format="tabular" name="database" type="data" label="BioGRID Database" help="BioGRID Database in TAB 3.0 format."/> + <param format="tabular" name="locations" type="data" label="UniProt Localization" help="UniProt tabular export with localization column to sample non-interacting pairs." optional="True" /> <conditional name="experiment"> <param name="type" type="select" label="Experimental Type" display="radio" help="Choose a specific experimental system type."> <option value="">Any</option> @@ -58,10 +59,6 @@ <test> <param format="tabular" name="input" value="roc/human_hv1h2.txt" /> <param format="tabular" name="database" value="roc/biogrid_fret.txt" /> - <conditional name="experiment"> - <param name="type" value="physical" /> - <param name="method" value="Two-hybrid" /> - </conditional> <output name="output" file="roc/human_hv1h2.png" /> </test> </tests>