Previous changeset 10:00fbbb20ffbe (2015-11-19) Next changeset 12:f8ef6b24862b (2016-01-29) |
Commit message:
Deleted selected files |
removed:
ConsensusPathDB_human_PPI.txt ProteinInteractions_v2.py ProteinInteractions_v2.xml tool_dependencies.xml |
b |
diff -r 00fbbb20ffbe -r ecbbc746d5a7 ConsensusPathDB_human_PPI.txt --- a/ConsensusPathDB_human_PPI.txt Thu Nov 19 11:45:52 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,184014 +0,0 @@\n-# ConsensusPathDB (version 29) list of human protein interactions\r\n-# source_databases\tinteraction_publications\tinteraction_participants\tinteraction_confidence\r\n-PhosphoPOINT,DIP,HPRD,Biogrid\t22307056,23602568,7687743,17081983,12840032,862676,93330262,7867743,1322499,17192257,16464493,8626767,15466476,19847302,11971971\tMK03_HUMAN\tNA\r\n-HPRD,BIND\t12834348,8267636,10779411,9733480,8355279,15952226,7045378,12437104,8663221,8939944,10966741,12039587,11342132,10346927,12593649\tFA10_HUMAN\tNA\r\n-BIND,HPRD\t9003757\tPROC_HUMAN\tNA\r\n-HPRD\t9493264,15100216\tDHYS_HUMAN\tNA\r\n-HPRD,Biogrid\t9521684\tARSA_HUMAN\tNA\r\n-HPRD\t1547496,2805069,7801123,20034391,17318177\tTAF1B_HUMAN,TAF1C_HUMAN,TBP_HUMAN,TAF1A_HUMAN,TAF1D_HUMAN\tNA\r\n-NetPath,HPRD,CORUM,Spike\t8692842,9852112\tCCNH_HUMAN,TF2H4_HUMAN,CDK7_HUMAN,ERCC2_HUMAN,TF2H2_HUMAN,TF2H1_HUMAN,ERCC3_HUMAN,MAT1_HUMAN,TF2H3_HUMAN\tNA\r\n-Biogrid\t22939629\tECHB_HUMAN,ECHA_HUMAN\t1\r\n-HPRD,PINdb,CORUM,Spike,IntAct,MINT,Biogrid\t22939629,9677423,17643375,1956398,9836642,11113176,16547462,1956403,12665589,7926747\tT2EA_HUMAN,T2EB_HUMAN\t0.99982300000000002\r\n-Biogrid,Spike,BIND,PINdb,CORUM,IntAct,HPRD\t12972251,8224848,17643375,7958900,8626665,7724559\tT2AG_HUMAN,TF2AA_HUMAN\t1\r\n-CORUM\t16791850\tSCRIB_HUMAN,VANG2_HUMAN\t0.97206899999999996\r\n-IntAct,HPRD\t19379695\tDVL2_HUMAN,PAR6A_HUMAN\t0.99919100000000005\r\n-PhosphoPOINT,IntAct,DIP,HPRD\t16172120,21884935,20668654,9452388,12408851,16126727\tKCC2A_HUMAN\tNA\r\n-IntAct\t20826808\tKGP1_HUMAN\tNA\r\n-Biogrid,HPRD,PhosphoPOINT,IntAct\t17513757,20194509,12901858,19345186,23602568,9448268\tNLK_HUMAN\tNA\r\n-BIND,Manual upload,Spike,IntAct,HPRD,Biogrid,InnateDB,MINT,NetPath\t8596638,21706016,11960376,11314042,11027663,22536782,10913131,20936779,9242692,16099987,19523899,9804835,19380743,10871282,20005866,15010462\tGRB2_HUMAN,GAB1_HUMAN\t0.99998600000000004\r\n-NetPath,Biogrid,MINT,PDB,IntAct,BIND,Spike,PhosphoPOINT,HPRD,DIP\t18271526,10788520,23597562,1856216,8413296,12093292,11684674,9725626,19336395,10970856,17145710,3494473,21439278,8639530,15837620,16274239,12297050,18427122,2790960,15950906,17697999,15620700,9029149,8420971,21822280,20837704,10085134,8663535,10497195,12620237,19531499\tEGF_HUMAN,EGFR_HUMAN\t0.99983500000000003\r\n-Biogrid,HPRD,PhosphoPOINT,Spike\t12837288,11337490,15572663,10823820,16094384,16254239,15817476,7537852,10085298,12408869,15107421,7615549,16713569,7534286,12853963,9658172,15592455,11695992,11774284,9230116\tSRC_HUMAN,PAXI_HUMAN\t0.99928099999999997\r\n-IntAct\t7679113\tPDGFB_HUMAN,PDGFA_HUMAN\t0.99999700000000002\r\n-Biogrid,HPRD,PhosphoPOINT\t7535778,11046132,2161888,8617789,7523122,2542288,11818394,8943348,16497976,10837138,9546424\tPGFRA_HUMAN\tNA\r\n-Biogrid,Spike,PhosphoPOINT,IntAct,HPRD\t8617789,10391677,16189514,98055834,12062403,10821867,15271984,7523122,2542288,7935391,8195171,8940081,2550144,11331882,1847074,2161888\tPGFRB_HUMAN,PGFRA_HUMAN\t0.99990000000000001\r\n-Biogrid,BIND,PhosphoPOINT,HPRD,DIP\t10821867,10391677,11046132,18505839,9207175,8195171,2550144,98055834,15889147,8617789,7935391,15271984,8940081,12062403,7523122\tPGFRB_HUMAN\tNA\r\n-HPRD,BIND,Biogrid\t11279502\tCD80_HUMAN\tNA\r\n-InnateDB,Spike,CORUM,IntAct\t17043309,21376236,18614546\tMTOR_HUMAN,RICTR_HUMAN,SIN1_HUMAN,LST8_HUMAN\tNA\r\n-HPRD,BIND,Spike\t11278590,1522902,8479519,9240923,16189514\tCOL_HUMAN,LIPP_HUMAN\t0.99988999999999995\r\n-BIND,IntAct,HPRD,DIP,Biogrid,MINT\t1610922,9789022,19664600,12403615,11560492,10742177,21777382,15689188,15469931,7966314,15210129,5146194,12820260,11243784,9818054,19861125,8428915,23850452,9154916,15981995,10052934,21740906,15299680,23792159,15735344,15299640,14711308,10986125,16185074,21422279\tTTHY_HUMAN\tNA\r\n-Spike,HPRD,DIP,Biogrid\t10360181,8636150,19168127,9789084,12782285\tGBG1_HUMAN,GBB1_HUMAN\t0.999996\r\n-BIND,Spike,CORUM,IntAct,PDB,MINT,HPRD\t12825937,15170324,17208314,16189514,12036349,19246009,15248757,15451670,11687658,9065406\tFNTA_HUMAN,FNTB_HUMAN\t1\r\n-HPRD,Biogrid\t1917988\tOPSD_HUMAN,ARRS_HUMAN\t0.33184000000000002\r\n-IntAct,Spike,HPRD,MINT,Biogrid\t17224163,16501559,21988832,161690'..b'9599999999998\r\n-Manual upload\t16099987,10799540\tSEC63_HUMAN,S61A1_HUMAN\t0.99981799999999998\r\n-Manual upload\t16099987,10725395\tSNTA1_HUMAN,ERBB2_HUMAN\t0.81840900000000005\r\n-Manual upload\t16099987,11160423\tCND2_HUMAN,ACTN3_HUMAN\t0.83902299999999996\r\n-Manual upload\t16099987,12574403\tRASN_HUMAN,RIN1_HUMAN\t0.97078799999999998\r\n-Manual upload\t16099987,11050113\t41_HUMAN,GRIA1_HUMAN\t0.78937100000000004\r\n-Manual upload\t16099987,8183343\tNMDE1_HUMAN,2A5G_HUMAN\t0.97144299999999995\r\n-Manual upload\t16099987,11313948\tIRS1_HUMAN,RET1_HUMAN\t0.94985200000000003\r\n-Manual upload\t16099987,8072547\tFOS_HUMAN,FRK_HUMAN\t0.93157400000000001\r\n-Manual upload\t16099987,14532002\tKAPCA_HUMAN,RIMS1_HUMAN\t0.95987299999999998\r\n-Manual upload\t9689133,16099987\tGSK3A_HUMAN,PSN1_HUMAN\t0.43898199999999998\r\n-Manual upload\t16099987,11978810\tSTX3_HUMAN,KCC2A_HUMAN\t0.99540899999999999\r\n-Manual upload\t11093160,16099987\tCOF1_HUMAN,2A5G_HUMAN\t0.98403099999999999\r\n-Manual upload\t16099987,12361576\t1433F_HUMAN,COF1_HUMAN\t0.56582699999999997\r\n-Manual upload\t16099987,15082510\tGNAO_HUMAN,OPRX_HUMAN\t0.99776299999999996\r\n-Manual upload\t16099987,11381094\tSTON2_HUMAN,AP2S1_HUMAN\t0.999919\r\n-Manual upload\t16099987,8384211\tITPR1_HUMAN,ANK1_HUMAN\t0.99430499999999999\r\n-Manual upload\t16099987,11516400\tRAB3A_HUMAN,GRAB_HUMAN\t0.38458599999999998\r\n-Manual upload\t10037467,16099987\tNMDE1_HUMAN,TBA1B_HUMAN\t0.28129399999999999\r\n-Manual upload\t16099987,11516400\tGRAB_HUMAN,IP6K2_HUMAN\t0.45438600000000001\r\n-Manual upload\t16099987,8810315\tARHG1_HUMAN,RHG01_HUMAN\t0.986649\r\n-Manual upload\t16099987,7624324\tRAF1_HUMAN,M4K1_HUMAN\t0.39155200000000001\r\n-Manual upload\t9601641,16099987\tGSK3A_HUMAN,APC1_HUMAN\t0.99829900000000005\r\n-Manual upload\t16099987,1645526\tBKRB1_HUMAN,GBB2_HUMAN\t0.96690299999999996\r\n-Manual upload\t16099987,14976183\tACM1_HUMAN,RGS2_HUMAN\t0.97882400000000003\r\n-Manual upload\t7678277,16099987\tTBA1B_HUMAN,LYN_HUMAN\t0.869919\r\n-Manual upload\t10488094,16099987\tGRM7_HUMAN,GNAO_HUMAN\t0.97910600000000003\r\n-Manual upload\t16099987,11018051\tACTB_HUMAN,SRC8_HUMAN\t0.99978299999999998\r\n-Manual upload\t15070900,16099987\tACTN3_HUMAN,SH3K1_HUMAN\t0.99239299999999997\r\n-Manual upload\t16099987,15082510\tOPRD_HUMAN,GBB2_HUMAN\t0.96181099999999997\r\n-Manual upload\t1962211,16099987\tADCY2_HUMAN,GBB2_HUMAN\t0.986711\r\n-Manual upload\t11555640,16099987\tPKR1_HUMAN,CASP3_HUMAN\t0.925867\r\n-Manual upload\t16099987,7916637\tGBB2_HUMAN,ACM4_HUMAN\t0.89375199999999999\r\n-Manual upload\t10514522,16099987\tCAC1C_HUMAN,MTAP2_HUMAN\t0.94425899999999996\r\n-Manual upload\t16099987,8611507\tKAPCA_HUMAN,PPR1A_HUMAN\t0.27306900000000001\r\n-Manual upload\t16099987,15082510\tGNAO_HUMAN,CNR1_HUMAN\t0.999865\r\n-Manual upload\t10531066,16099987\tMEF2B_HUMAN,MK14_HUMAN\t0.19953899999999999\r\n-Manual upload\t16099987,186030\tMY18A_HUMAN,MYPT1_HUMAN\t0.99321199999999998\r\n-Manual upload\t16099987,14976183\tRGS2_HUMAN,GNA11_HUMAN\t0.99947600000000003\r\n-Manual upload\t16099987,7828890\tRASN_HUMAN,RGRF1_HUMAN\t0.99891399999999997\r\n-Manual upload\t16099987,12809504\tKPCA_HUMAN,SRF_HUMAN\t0.99548700000000001\r\n-Manual upload\t16099987,15082510\tGNAI3_HUMAN,OPRD_HUMAN\t0.99975400000000003\r\n-Manual upload\t16099987,7809086\tRASN_HUMAN,GNDS_HUMAN\t0.92483499999999996\r\n-Manual upload\t16099987,2116237\tRASN_HUMAN,NF1_HUMAN\t0.94440800000000003\r\n-Manual upload\t16099987,8393539\tCANB1_HUMAN,MTAP2_HUMAN\t0.45007399999999997\r\n-Manual upload\t12114518,16099987\tKCNA1_HUMAN,STX3_HUMAN\t0.99938800000000005\r\n-Manual upload\t16099987,7553862\tCPLX1_HUMAN,STX3_HUMAN\t0.99945200000000001\r\n-Manual upload\t10611249,16099987\tNFAC1_HUMAN,KAPCA_HUMAN\t0.93391299999999999\r\n-Manual upload\t16099987,10692440\tSTX3_HUMAN,CAC1B_HUMAN\t0.97467899999999996\r\n-Manual upload\t16099987,8868471\tPI42A_HUMAN,RHG01_HUMAN\t0.0098405899999999998\r\n-Manual upload\t16099987,7716514\tFIBA_HUMAN,ITB1_HUMAN\t0.99950099999999997\r\n-Manual upload\t16099987,7615553\tTF2B_HUMAN,CREB1_HUMAN\t0.99965599999999999\r\n-Manual upload\t12820960,16099987\tMTOR_HUMAN,4EBP3_HUMAN\t0.99838000000000005\r\n-Manual upload\t16099987,11433294\tPAR1_HUMAN,DVL1_HUMAN\t0.87840099999999999\r\n' |
b |
diff -r 00fbbb20ffbe -r ecbbc746d5a7 ProteinInteractions_v2.py --- a/ProteinInteractions_v2.py Thu Nov 19 11:45:52 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,171 +0,0 @@ -################################################################################ -# This program will read in a SAINT 'list.txt' file and the interactions from -# the consensus path db database and return all the interactions that we saw in -# our experiment in a format suitable for cytoscape. This allows us to filter -# before getting PPIs so that it doesn't affect our SAINT score or include -# interactions that don't score well -################################################################################ -import urllib2 -import itertools -import sys -import os -################################################################################ -## REQUIRED INPUT ## - -# 1) listfile: SAINTexpress output -# 2) SAINT_cutoff: Saint score cutoff for import (between 0 and 1) -# 3) Int_conf: Confidence of PPI from CPDB to include -# - low: no filtering -# - medium: >0.5 -# - high: >0.7 -# - very high: >0.9 -# 4) Species: Human, Yeast, or Mouse -############################################################################### -listfile = sys.argv[1] -SAINT_cutoff = sys.argv[2] -Int_conf = sys.argv[3] -Species = sys.argv[4] -cyto_file = sys.argv[5] -db_path = sys.argv[6] -############################################################################### -class ReturnValue1(object): - def __init__(self, uniprot_acc, gene, swissprot): - self.up = uniprot_acc - self.gn = gene - self.sp = swissprot -class ReturnValue2(object): - def __init__(self, getdata, getproteins, getheader): - self.data = getdata - self.proteins = getproteins - self.header = getheader -def main(listfile, SAINT_cutoff, Int_conf, Species): - cytoscape(dd_network(listfile, SAINT_cutoff, Int_conf), listfile, SAINT_cutoff) -def readtab(infile): - with open(infile,'r') as x: # read in tab-delim text - output = [] - for line in x: - line = line.strip() - temp = line.split('\t') - output.append(temp) - return output -def read_listfile(listfile): # Get data, proteins and header from scaffold output - dupes = readtab(listfile) - header = dupes[0] - prot_start = header.index("PreyGene")-1 - data = dupes[1:] # cut off blank line and END OF FILE - proteins = [] - for protein in data: - proteins.append(protein[prot_start]) - return ReturnValue2(data, proteins, header) - -def get_info(uniprot_accession_in): #get aa lengths and gene name - error = open('error proteins.txt', 'a+') - i=0 - while i==0: - try: - data = urllib2.urlopen("http://www.uniprot.org/uniprot/" + uniprot_accession_in + ".fasta") - break - except urllib2.HTTPError, err: - i = i + 1 - if i == 50: - sys.exit("More than 50 errors. Check your file or try again later.") - if err.code == 404: - error.write(uniprot_accession_in + '\t' + "Invalid URL. Check protein" + '\n') - seqlength = 'NA' - genename = 'NA' - return ReturnValue1(seqlength, genename) - elif err.code == 302: - sys.exit("Request timed out. Check connection and try again.") - else: - sys.exit("Uniprot had some other error") - lines = data.readlines() - header = lines[0] - lst = header.split('|') - lst2 = lst[2].split(' ') - swissprot = lst2[0] - uniprot_acc = lst[1] - if lines == []: - error.write(uniprot_accession_in + '\t' + "Blank Fasta" + '\n') - error.close - uniprot_acc = 'NA' - genename = 'NA' - return ReturnValue1(uniprot_acc, genename, swissprot) - if lines != []: - seqlength = 0 - header = lines[0] - if 'GN=' in header: - lst = header.split('GN=') - lst2 = lst[1].split(' ') - genename = lst2[0] - error.close - return ReturnValue1(uniprot_acc, genename, swissprot) - if 'GN=' not in header: - genename = 'NA' - error.close - return ReturnValue1(uniprot_acc, genename, swissprot) - -def dd_network(listfile, SAINTscore, CPDB_filter): ## Filter by SS and CPDB - data = read_listfile(listfile).data # change to filtered list - SS = (read_listfile(listfile).header).index("SaintScore") - filt_data = [] - for i in data: - if i[SS] >= SAINTscore: - filt_data.append(i) - accessions = [] - for i in filt_data: - accessions.append(get_info(i[1]).sp) - GO=[] - for i in CPDB[2:]: - if i[3] >= CPDB_filter: # filter interaction confidence - GO.append(i[2]) # all known interactions - GO2 = [] - for i in GO: - GO2.append(i.split(',')) # make interactions list friendly - unfiltered_network = {} - for i in accessions: - interactions = [] - for j in GO2: - if i in j: # find the interactions - if j not in interactions:# dont add duplicate interactions - interactions.append(j) - merged = list(itertools.chain(*interactions)) # flatten list of lists - unfiltered_network[i]=merged # assign all possible interactions to protein in a dictionary - dd_network = {} #data dependent network - for i in unfiltered_network: - temp = [] - for j in unfiltered_network[i]: - if j in accessions: - if j not in temp: - if j != i: - temp.append(j) - dd_network[i]=temp - return dd_network -def cytoscape(dd_network, listfile, SAINTscore): - with open('network.sif','wt') as y: - data = read_listfile(listfile).data - SS = (read_listfile(listfile).header).index("SaintScore") - filt_data = [] - for i in data: - if i[SS] >= SAINTscore: - filt_data.append(i) - header = ["Prey", "Interactions"] - header = '\t'.join(header) - y.write(header + '\n') - for i in filt_data: - if dd_network[i[1]] != []: - lst = [] - #x='\t'.join(i) - for j in dd_network[i[1]]: - lst.append(j) - for j in lst: - y.write(i[1]+'\t'+'pp'+'\t' + j+'\n') -if Species == "Human": - CPDB = readtab(str(db_path) + 'ConsensusPathDB_human_PPI.txt') -if Species == "Yeast": - CPDB = readtab(str(db_path) + 'ConsensusPathDB_yeast_PPI.txt') -if Species == "Mouse": - CPDB = readtab(str(db_path) +'ConsensusPathDB_mouse_PPI.txt') -if __name__ == '__main__': - main(listfile, SAINT_cutoff, Int_conf, Species) - #main("Crizo_list.txt", 0.7, 0.7, 'Human') - os.rename('network.sif', str(cyto_file)) |
b |
diff -r 00fbbb20ffbe -r ecbbc746d5a7 ProteinInteractions_v2.xml --- a/ProteinInteractions_v2.xml Thu Nov 19 11:45:52 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,36 +0,0 @@ -<tool id="SAINT_ProteinInteractions_v2" name="SAINT Output to Protein Interaction File"> - <description></description> - <command interpreter="python">ProteinInteractions_v2.py $input $saint_cutoff $CPDB_cutoff $species $Cytoscape_File \$INSTALL_RUN_PATH/</command> - <requirements> - <requirement type="set_environment">INSTALL_RUN_PATH</requirement> - </requirements> - <inputs> - <param format="txt" name="input" type="data" label="SAINT Output"/> - <param type="float" name="saint_cutoff" label="Saint Score Cutoff" value="0.8"/> - <param type="float" name="CPDB_cutoff" label="Consensus Path Database Score Cutoff" value="0.8"/> - <param type="select" name="species" label="Species"> - <option value="Human">Human</option> - <option value="Yeast">Yeast</option> - <option value="Mouse">Mouse</option> - </param> - </inputs> - <outputs> - <data format="sif" name="Cytoscape_File" label="Cytoscape File"/> - </outputs> - <stdio> - <regex match="error" - source="stdout" - level="fatal" - description="Unknown error"/> - </stdio> - - <tests> - <test> - <param name="input" value="fa_gc_content_input.fa"/> - <output name="out_file1" file="fa_gc_content_output.txt"/> - </test> - </tests> - <help> - This program will read in a SAINT 'list.txt' file and the interactions from the consensus path db database and return all the interactions that we saw in our experiment in a format suitable for cytoscape. This allows us to filter before getting PPIs so that it doesn't affect our SAINT score or include interactions that don't score well - </help> -</tool> \ No newline at end of file |
b |
diff -r 00fbbb20ffbe -r ecbbc746d5a7 tool_dependencies.xml --- a/tool_dependencies.xml Thu Nov 19 11:45:52 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <set_environment version="1.0"> - <environment_variable name="INSTALL_RUN_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable> - </set_environment>--> -</tool_dependency> \ No newline at end of file |