Repository 'saint_interactions'
hg clone https://toolshed.g2.bx.psu.edu/repos/bornea/saint_interactions

Changeset 11:ecbbc746d5a7 (2016-01-12)
Previous changeset 10:00fbbb20ffbe (2015-11-19) Next changeset 12:f8ef6b24862b (2016-01-29)
Commit message:
Deleted selected files
removed:
ConsensusPathDB_human_PPI.txt
ProteinInteractions_v2.py
ProteinInteractions_v2.xml
tool_dependencies.xml
b
diff -r 00fbbb20ffbe -r ecbbc746d5a7 ConsensusPathDB_human_PPI.txt
--- a/ConsensusPathDB_human_PPI.txt Thu Nov 19 11:45:52 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,184014 +0,0 @@\n-#  ConsensusPathDB (version 29) list of human protein interactions\r\n-#  source_databases\tinteraction_publications\tinteraction_participants\tinteraction_confidence\r\n-PhosphoPOINT,DIP,HPRD,Biogrid\t22307056,23602568,7687743,17081983,12840032,862676,93330262,7867743,1322499,17192257,16464493,8626767,15466476,19847302,11971971\tMK03_HUMAN\tNA\r\n-HPRD,BIND\t12834348,8267636,10779411,9733480,8355279,15952226,7045378,12437104,8663221,8939944,10966741,12039587,11342132,10346927,12593649\tFA10_HUMAN\tNA\r\n-BIND,HPRD\t9003757\tPROC_HUMAN\tNA\r\n-HPRD\t9493264,15100216\tDHYS_HUMAN\tNA\r\n-HPRD,Biogrid\t9521684\tARSA_HUMAN\tNA\r\n-HPRD\t1547496,2805069,7801123,20034391,17318177\tTAF1B_HUMAN,TAF1C_HUMAN,TBP_HUMAN,TAF1A_HUMAN,TAF1D_HUMAN\tNA\r\n-NetPath,HPRD,CORUM,Spike\t8692842,9852112\tCCNH_HUMAN,TF2H4_HUMAN,CDK7_HUMAN,ERCC2_HUMAN,TF2H2_HUMAN,TF2H1_HUMAN,ERCC3_HUMAN,MAT1_HUMAN,TF2H3_HUMAN\tNA\r\n-Biogrid\t22939629\tECHB_HUMAN,ECHA_HUMAN\t1\r\n-HPRD,PINdb,CORUM,Spike,IntAct,MINT,Biogrid\t22939629,9677423,17643375,1956398,9836642,11113176,16547462,1956403,12665589,7926747\tT2EA_HUMAN,T2EB_HUMAN\t0.99982300000000002\r\n-Biogrid,Spike,BIND,PINdb,CORUM,IntAct,HPRD\t12972251,8224848,17643375,7958900,8626665,7724559\tT2AG_HUMAN,TF2AA_HUMAN\t1\r\n-CORUM\t16791850\tSCRIB_HUMAN,VANG2_HUMAN\t0.97206899999999996\r\n-IntAct,HPRD\t19379695\tDVL2_HUMAN,PAR6A_HUMAN\t0.99919100000000005\r\n-PhosphoPOINT,IntAct,DIP,HPRD\t16172120,21884935,20668654,9452388,12408851,16126727\tKCC2A_HUMAN\tNA\r\n-IntAct\t20826808\tKGP1_HUMAN\tNA\r\n-Biogrid,HPRD,PhosphoPOINT,IntAct\t17513757,20194509,12901858,19345186,23602568,9448268\tNLK_HUMAN\tNA\r\n-BIND,Manual upload,Spike,IntAct,HPRD,Biogrid,InnateDB,MINT,NetPath\t8596638,21706016,11960376,11314042,11027663,22536782,10913131,20936779,9242692,16099987,19523899,9804835,19380743,10871282,20005866,15010462\tGRB2_HUMAN,GAB1_HUMAN\t0.99998600000000004\r\n-NetPath,Biogrid,MINT,PDB,IntAct,BIND,Spike,PhosphoPOINT,HPRD,DIP\t18271526,10788520,23597562,1856216,8413296,12093292,11684674,9725626,19336395,10970856,17145710,3494473,21439278,8639530,15837620,16274239,12297050,18427122,2790960,15950906,17697999,15620700,9029149,8420971,21822280,20837704,10085134,8663535,10497195,12620237,19531499\tEGF_HUMAN,EGFR_HUMAN\t0.99983500000000003\r\n-Biogrid,HPRD,PhosphoPOINT,Spike\t12837288,11337490,15572663,10823820,16094384,16254239,15817476,7537852,10085298,12408869,15107421,7615549,16713569,7534286,12853963,9658172,15592455,11695992,11774284,9230116\tSRC_HUMAN,PAXI_HUMAN\t0.99928099999999997\r\n-IntAct\t7679113\tPDGFB_HUMAN,PDGFA_HUMAN\t0.99999700000000002\r\n-Biogrid,HPRD,PhosphoPOINT\t7535778,11046132,2161888,8617789,7523122,2542288,11818394,8943348,16497976,10837138,9546424\tPGFRA_HUMAN\tNA\r\n-Biogrid,Spike,PhosphoPOINT,IntAct,HPRD\t8617789,10391677,16189514,98055834,12062403,10821867,15271984,7523122,2542288,7935391,8195171,8940081,2550144,11331882,1847074,2161888\tPGFRB_HUMAN,PGFRA_HUMAN\t0.99990000000000001\r\n-Biogrid,BIND,PhosphoPOINT,HPRD,DIP\t10821867,10391677,11046132,18505839,9207175,8195171,2550144,98055834,15889147,8617789,7935391,15271984,8940081,12062403,7523122\tPGFRB_HUMAN\tNA\r\n-HPRD,BIND,Biogrid\t11279502\tCD80_HUMAN\tNA\r\n-InnateDB,Spike,CORUM,IntAct\t17043309,21376236,18614546\tMTOR_HUMAN,RICTR_HUMAN,SIN1_HUMAN,LST8_HUMAN\tNA\r\n-HPRD,BIND,Spike\t11278590,1522902,8479519,9240923,16189514\tCOL_HUMAN,LIPP_HUMAN\t0.99988999999999995\r\n-BIND,IntAct,HPRD,DIP,Biogrid,MINT\t1610922,9789022,19664600,12403615,11560492,10742177,21777382,15689188,15469931,7966314,15210129,5146194,12820260,11243784,9818054,19861125,8428915,23850452,9154916,15981995,10052934,21740906,15299680,23792159,15735344,15299640,14711308,10986125,16185074,21422279\tTTHY_HUMAN\tNA\r\n-Spike,HPRD,DIP,Biogrid\t10360181,8636150,19168127,9789084,12782285\tGBG1_HUMAN,GBB1_HUMAN\t0.999996\r\n-BIND,Spike,CORUM,IntAct,PDB,MINT,HPRD\t12825937,15170324,17208314,16189514,12036349,19246009,15248757,15451670,11687658,9065406\tFNTA_HUMAN,FNTB_HUMAN\t1\r\n-HPRD,Biogrid\t1917988\tOPSD_HUMAN,ARRS_HUMAN\t0.33184000000000002\r\n-IntAct,Spike,HPRD,MINT,Biogrid\t17224163,16501559,21988832,161690'..b'9599999999998\r\n-Manual upload\t16099987,10799540\tSEC63_HUMAN,S61A1_HUMAN\t0.99981799999999998\r\n-Manual upload\t16099987,10725395\tSNTA1_HUMAN,ERBB2_HUMAN\t0.81840900000000005\r\n-Manual upload\t16099987,11160423\tCND2_HUMAN,ACTN3_HUMAN\t0.83902299999999996\r\n-Manual upload\t16099987,12574403\tRASN_HUMAN,RIN1_HUMAN\t0.97078799999999998\r\n-Manual upload\t16099987,11050113\t41_HUMAN,GRIA1_HUMAN\t0.78937100000000004\r\n-Manual upload\t16099987,8183343\tNMDE1_HUMAN,2A5G_HUMAN\t0.97144299999999995\r\n-Manual upload\t16099987,11313948\tIRS1_HUMAN,RET1_HUMAN\t0.94985200000000003\r\n-Manual upload\t16099987,8072547\tFOS_HUMAN,FRK_HUMAN\t0.93157400000000001\r\n-Manual upload\t16099987,14532002\tKAPCA_HUMAN,RIMS1_HUMAN\t0.95987299999999998\r\n-Manual upload\t9689133,16099987\tGSK3A_HUMAN,PSN1_HUMAN\t0.43898199999999998\r\n-Manual upload\t16099987,11978810\tSTX3_HUMAN,KCC2A_HUMAN\t0.99540899999999999\r\n-Manual upload\t11093160,16099987\tCOF1_HUMAN,2A5G_HUMAN\t0.98403099999999999\r\n-Manual upload\t16099987,12361576\t1433F_HUMAN,COF1_HUMAN\t0.56582699999999997\r\n-Manual upload\t16099987,15082510\tGNAO_HUMAN,OPRX_HUMAN\t0.99776299999999996\r\n-Manual upload\t16099987,11381094\tSTON2_HUMAN,AP2S1_HUMAN\t0.999919\r\n-Manual upload\t16099987,8384211\tITPR1_HUMAN,ANK1_HUMAN\t0.99430499999999999\r\n-Manual upload\t16099987,11516400\tRAB3A_HUMAN,GRAB_HUMAN\t0.38458599999999998\r\n-Manual upload\t10037467,16099987\tNMDE1_HUMAN,TBA1B_HUMAN\t0.28129399999999999\r\n-Manual upload\t16099987,11516400\tGRAB_HUMAN,IP6K2_HUMAN\t0.45438600000000001\r\n-Manual upload\t16099987,8810315\tARHG1_HUMAN,RHG01_HUMAN\t0.986649\r\n-Manual upload\t16099987,7624324\tRAF1_HUMAN,M4K1_HUMAN\t0.39155200000000001\r\n-Manual upload\t9601641,16099987\tGSK3A_HUMAN,APC1_HUMAN\t0.99829900000000005\r\n-Manual upload\t16099987,1645526\tBKRB1_HUMAN,GBB2_HUMAN\t0.96690299999999996\r\n-Manual upload\t16099987,14976183\tACM1_HUMAN,RGS2_HUMAN\t0.97882400000000003\r\n-Manual upload\t7678277,16099987\tTBA1B_HUMAN,LYN_HUMAN\t0.869919\r\n-Manual upload\t10488094,16099987\tGRM7_HUMAN,GNAO_HUMAN\t0.97910600000000003\r\n-Manual upload\t16099987,11018051\tACTB_HUMAN,SRC8_HUMAN\t0.99978299999999998\r\n-Manual upload\t15070900,16099987\tACTN3_HUMAN,SH3K1_HUMAN\t0.99239299999999997\r\n-Manual upload\t16099987,15082510\tOPRD_HUMAN,GBB2_HUMAN\t0.96181099999999997\r\n-Manual upload\t1962211,16099987\tADCY2_HUMAN,GBB2_HUMAN\t0.986711\r\n-Manual upload\t11555640,16099987\tPKR1_HUMAN,CASP3_HUMAN\t0.925867\r\n-Manual upload\t16099987,7916637\tGBB2_HUMAN,ACM4_HUMAN\t0.89375199999999999\r\n-Manual upload\t10514522,16099987\tCAC1C_HUMAN,MTAP2_HUMAN\t0.94425899999999996\r\n-Manual upload\t16099987,8611507\tKAPCA_HUMAN,PPR1A_HUMAN\t0.27306900000000001\r\n-Manual upload\t16099987,15082510\tGNAO_HUMAN,CNR1_HUMAN\t0.999865\r\n-Manual upload\t10531066,16099987\tMEF2B_HUMAN,MK14_HUMAN\t0.19953899999999999\r\n-Manual upload\t16099987,186030\tMY18A_HUMAN,MYPT1_HUMAN\t0.99321199999999998\r\n-Manual upload\t16099987,14976183\tRGS2_HUMAN,GNA11_HUMAN\t0.99947600000000003\r\n-Manual upload\t16099987,7828890\tRASN_HUMAN,RGRF1_HUMAN\t0.99891399999999997\r\n-Manual upload\t16099987,12809504\tKPCA_HUMAN,SRF_HUMAN\t0.99548700000000001\r\n-Manual upload\t16099987,15082510\tGNAI3_HUMAN,OPRD_HUMAN\t0.99975400000000003\r\n-Manual upload\t16099987,7809086\tRASN_HUMAN,GNDS_HUMAN\t0.92483499999999996\r\n-Manual upload\t16099987,2116237\tRASN_HUMAN,NF1_HUMAN\t0.94440800000000003\r\n-Manual upload\t16099987,8393539\tCANB1_HUMAN,MTAP2_HUMAN\t0.45007399999999997\r\n-Manual upload\t12114518,16099987\tKCNA1_HUMAN,STX3_HUMAN\t0.99938800000000005\r\n-Manual upload\t16099987,7553862\tCPLX1_HUMAN,STX3_HUMAN\t0.99945200000000001\r\n-Manual upload\t10611249,16099987\tNFAC1_HUMAN,KAPCA_HUMAN\t0.93391299999999999\r\n-Manual upload\t16099987,10692440\tSTX3_HUMAN,CAC1B_HUMAN\t0.97467899999999996\r\n-Manual upload\t16099987,8868471\tPI42A_HUMAN,RHG01_HUMAN\t0.0098405899999999998\r\n-Manual upload\t16099987,7716514\tFIBA_HUMAN,ITB1_HUMAN\t0.99950099999999997\r\n-Manual upload\t16099987,7615553\tTF2B_HUMAN,CREB1_HUMAN\t0.99965599999999999\r\n-Manual upload\t12820960,16099987\tMTOR_HUMAN,4EBP3_HUMAN\t0.99838000000000005\r\n-Manual upload\t16099987,11433294\tPAR1_HUMAN,DVL1_HUMAN\t0.87840099999999999\r\n'
b
diff -r 00fbbb20ffbe -r ecbbc746d5a7 ProteinInteractions_v2.py
--- a/ProteinInteractions_v2.py Thu Nov 19 11:45:52 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,171 +0,0 @@
-################################################################################
-# This program will read in a SAINT 'list.txt' file and the interactions from
-# the consensus path db database and return all the interactions that we saw in
-# our experiment in a format suitable for cytoscape. This allows us to filter
-# before getting PPIs so that it doesn't affect our SAINT score or include
-# interactions that don't score well
-################################################################################
-import urllib2
-import itertools
-import sys
-import os
-################################################################################
-## REQUIRED INPUT ##
-
-# 1) listfile: SAINTexpress output
-# 2) SAINT_cutoff: Saint score cutoff for import (between 0 and 1)
-# 3) Int_conf: Confidence of PPI from CPDB to include
-#       - low: no filtering
-#       - medium: >0.5
-#       - high: >0.7
-#       - very high: >0.9
-# 4) Species: Human, Yeast, or Mouse
-###############################################################################
-listfile = sys.argv[1]
-SAINT_cutoff = sys.argv[2]
-Int_conf = sys.argv[3]
-Species = sys.argv[4]
-cyto_file = sys.argv[5]
-db_path = sys.argv[6]
-###############################################################################
-class ReturnValue1(object):
-    def __init__(self, uniprot_acc, gene, swissprot):
-        self.up = uniprot_acc
-        self.gn = gene
-        self.sp = swissprot
-class ReturnValue2(object):
-    def __init__(self, getdata, getproteins, getheader):
-        self.data = getdata
-        self.proteins = getproteins
-        self.header = getheader
-def main(listfile, SAINT_cutoff, Int_conf, Species):
-    cytoscape(dd_network(listfile, SAINT_cutoff, Int_conf), listfile, SAINT_cutoff)
-def readtab(infile):
-    with open(infile,'r') as x: # read in tab-delim text
-        output = []
-        for line in x:
-            line = line.strip()
-            temp = line.split('\t')
-            output.append(temp)
-    return output
-def read_listfile(listfile): # Get data, proteins and header from scaffold output
-    dupes = readtab(listfile)
-    header = dupes[0]
-    prot_start = header.index("PreyGene")-1
-    data = dupes[1:] # cut off blank line and END OF FILE
-    proteins = []
-    for protein in data:
-        proteins.append(protein[prot_start])
-    return ReturnValue2(data, proteins, header)
-
-def get_info(uniprot_accession_in): #get aa lengths and gene name
-    error = open('error proteins.txt', 'a+')
-    i=0
-    while i==0:
-        try:
-            data = urllib2.urlopen("http://www.uniprot.org/uniprot/" + uniprot_accession_in + ".fasta")
-            break
-        except urllib2.HTTPError, err:
-            i = i + 1
-            if i == 50:
-                sys.exit("More than 50 errors. Check your file or try again later.")
-            if err.code == 404:
-                error.write(uniprot_accession_in + '\t' + "Invalid URL. Check protein" + '\n')
-                seqlength = 'NA'
-                genename = 'NA'
-                return ReturnValue1(seqlength, genename)
-            elif err.code == 302:
-                sys.exit("Request timed out. Check connection and try again.")
-            else:
-                sys.exit("Uniprot had some other error")
-    lines = data.readlines()
-    header = lines[0] 
-    lst = header.split('|')
-    lst2 = lst[2].split(' ')
-    swissprot = lst2[0]
-    uniprot_acc = lst[1]
-    if lines == []:
-        error.write(uniprot_accession_in + '\t' + "Blank Fasta" + '\n')
-        error.close
-        uniprot_acc = 'NA'
-        genename = 'NA'
-        return ReturnValue1(uniprot_acc, genename, swissprot)
-    if lines != []:
-        seqlength = 0
-        header = lines[0] 
-        if 'GN=' in header:
-            lst = header.split('GN=')
-            lst2 = lst[1].split(' ')
-            genename = lst2[0]
-            error.close
-            return ReturnValue1(uniprot_acc, genename, swissprot)
-        if 'GN=' not in header:
-            genename = 'NA'
-            error.close
-            return ReturnValue1(uniprot_acc, genename, swissprot)
-
-def dd_network(listfile, SAINTscore, CPDB_filter): ## Filter by SS and CPDB
-    data = read_listfile(listfile).data # change to filtered list
-    SS = (read_listfile(listfile).header).index("SaintScore")
-    filt_data = []
-    for i in data: 
-        if i[SS] >= SAINTscore:
-            filt_data.append(i)
-    accessions = []
-    for i in filt_data:
-        accessions.append(get_info(i[1]).sp)
-    GO=[]
-    for i in CPDB[2:]:
-        if i[3] >= CPDB_filter: # filter interaction confidence
-            GO.append(i[2]) # all known interactions
-    GO2 = []
-    for i in GO:
-        GO2.append(i.split(',')) # make interactions list friendly
-    unfiltered_network = {}
-    for i in accessions:
-        interactions = []
-        for j in GO2:
-            if i in j: # find the interactions
-                if j not in interactions:# dont add duplicate interactions
-                    interactions.append(j)
-        merged = list(itertools.chain(*interactions)) # flatten list of lists
-        unfiltered_network[i]=merged # assign all possible interactions to protein in a dictionary
-    dd_network = {} #data dependent network
-    for i in unfiltered_network:
-        temp = []
-        for j in unfiltered_network[i]:
-            if j in accessions:
-                if j not in temp:
-                    if j != i:
-                        temp.append(j)
-        dd_network[i]=temp
-    return dd_network
-def cytoscape(dd_network, listfile, SAINTscore):
-    with open('network.sif','wt') as y:
-        data = read_listfile(listfile).data
-        SS = (read_listfile(listfile).header).index("SaintScore")
-        filt_data = []
-        for i in data: 
-            if i[SS] >= SAINTscore:
-                filt_data.append(i)
-        header = ["Prey", "Interactions"]
-        header = '\t'.join(header)
-        y.write(header + '\n')
-        for i in filt_data:
-            if dd_network[i[1]] != []:
-                lst = []
-                #x='\t'.join(i)
-                for j in dd_network[i[1]]:
-                    lst.append(j)
-                for j in lst:
-                    y.write(i[1]+'\t'+'pp'+'\t' + j+'\n')
-if Species == "Human":
-        CPDB = readtab(str(db_path) + 'ConsensusPathDB_human_PPI.txt')
-if Species == "Yeast":
-        CPDB = readtab(str(db_path) + 'ConsensusPathDB_yeast_PPI.txt')
-if Species == "Mouse":
-        CPDB = readtab(str(db_path) +'ConsensusPathDB_mouse_PPI.txt')
-if __name__ == '__main__':
-    main(listfile, SAINT_cutoff, Int_conf, Species)
-    #main("Crizo_list.txt", 0.7, 0.7, 'Human')
-    os.rename('network.sif', str(cyto_file))
b
diff -r 00fbbb20ffbe -r ecbbc746d5a7 ProteinInteractions_v2.xml
--- a/ProteinInteractions_v2.xml Thu Nov 19 11:45:52 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,36 +0,0 @@
-<tool id="SAINT_ProteinInteractions_v2" name="SAINT Output to Protein Interaction File">
-  <description></description>
-  <command interpreter="python">ProteinInteractions_v2.py $input $saint_cutoff $CPDB_cutoff $species $Cytoscape_File \$INSTALL_RUN_PATH/</command>
-  <requirements>
-    <requirement type="set_environment">INSTALL_RUN_PATH</requirement>
-  </requirements>
-  <inputs>
-    <param format="txt" name="input" type="data" label="SAINT Output"/>
-    <param type="float" name="saint_cutoff" label="Saint Score Cutoff" value="0.8"/>
-    <param type="float" name="CPDB_cutoff" label="Consensus Path Database Score Cutoff" value="0.8"/>
-    <param type="select" name="species" label="Species">
-      <option value="Human">Human</option>
-      <option value="Yeast">Yeast</option>
-      <option value="Mouse">Mouse</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="sif" name="Cytoscape_File" label="Cytoscape File"/>    
-  </outputs>
-  <stdio>
-    <regex match="error"
-    source="stdout"
-           level="fatal"
-           description="Unknown error"/>
-  </stdio> 
-
-  <tests>
-    <test>
-      <param name="input" value="fa_gc_content_input.fa"/>
-      <output name="out_file1" file="fa_gc_content_output.txt"/>
-    </test>
-  </tests>
-  <help>
-    This program will read in a SAINT 'list.txt' file and the interactions from the consensus path db database and return all the interactions that we saw in our experiment in a format suitable for cytoscape. This allows us to filter before getting PPIs so that it doesn't affect our SAINT score or include interactions that don't score well
-  </help>
-</tool>
\ No newline at end of file
b
diff -r 00fbbb20ffbe -r ecbbc746d5a7 tool_dependencies.xml
--- a/tool_dependencies.xml Thu Nov 19 11:45:52 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <set_environment version="1.0">
-        <environment_variable name="INSTALL_RUN_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>   
-    </set_environment>-->
-</tool_dependency>
\ No newline at end of file