Mercurial > repos > bornea > saint_interactions
comparison ProteinInteractions_v2.py @ 13:fd27366b9faf draft
Uploaded
author | bornea |
---|---|
date | Fri, 29 Jan 2016 12:52:27 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
12:f8ef6b24862b | 13:fd27366b9faf |
---|---|
1 ################################################################################ | |
2 # This program will read in a SAINT 'list.txt' file and the interactions from | |
3 # the consensus path db database and return all the interactions that we saw in | |
4 # our experiment in a format suitable for cytoscape. This allows us to filter | |
5 # before getting PPIs so that it doesn't affect our SAINT score or include | |
6 # interactions that don't score well | |
7 ################################################################################ | |
8 # Copyright (C) Brent Kuenzi. | |
9 # Permission is granted to copy, distribute and/or modify this document | |
10 # under the terms of the GNU Free Documentation License, Version 1.3 | |
11 # or any later version published by the Free Software Foundation; | |
12 # with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. | |
13 # A copy of the license is included in the section entitled "GNU | |
14 # Free Documentation License". | |
15 ################################################################################ | |
16 ## REQUIRED INPUT ## | |
17 | |
18 # 1) listfile: SAINTexpress output | |
19 # 2) SAINT_cutoff: Saint score cutoff for import (between 0 and 1) | |
20 # 3) Int_conf: Confidence of PPI from CPDB to include | |
21 # - low: no filtering | |
22 # - medium: >0.5 | |
23 # - high: >0.7 | |
24 # - very high: >0.9 | |
25 # 4) Species: Human, Yeast, or Mouse | |
26 ################################################################################ | |
27 | |
28 | |
29 import urllib2 | |
30 import itertools | |
31 import sys | |
32 import os | |
33 | |
34 | |
35 listfile = sys.argv[1] | |
36 SAINT_cutoff = sys.argv[2] | |
37 Int_conf = sys.argv[3] | |
38 Species = sys.argv[4] | |
39 cyto_file = sys.argv[5] | |
40 db_path = sys.argv[6] | |
41 | |
42 | |
43 class ReturnValue1(object): | |
44 def __init__(self, uniprot_acc, gene, swissprot): | |
45 self.up = uniprot_acc | |
46 self.gn = gene | |
47 self.sp = swissprot | |
48 class ReturnValue2(object): | |
49 def __init__(self, getdata, getproteins, getheader): | |
50 self.data = getdata | |
51 self.proteins = getproteins | |
52 self.header = getheader | |
53 | |
54 | |
55 def main(listfile, SAINT_cutoff, Int_conf, Species): | |
56 cytoscape(dd_network(listfile, SAINT_cutoff, Int_conf), listfile, SAINT_cutoff) | |
57 | |
58 | |
59 def readtab(infile): | |
60 with open(infile, 'r') as file_to_read: | |
61 # Read in tab-delim text. | |
62 output = [] | |
63 for line in file_to_read: | |
64 line = line.strip() | |
65 temp = line.split('\t') | |
66 output.append(temp) | |
67 return output | |
68 | |
69 | |
70 def read_listfile(listfile): | |
71 # Get data, proteins and header from scaffold output | |
72 dupes = readtab(listfile) | |
73 header = dupes[0] | |
74 prot_start = header.index("PreyGene")-1 | |
75 data = dupes[1:] | |
76 # Cut off blank line and END OF FILE. | |
77 proteins = [] | |
78 for protein in data: | |
79 proteins.append(protein[prot_start]) | |
80 return ReturnValue2(data, proteins, header) | |
81 | |
82 | |
83 def get_info(uniprot_accession_in): | |
84 # Get aa lengths and gene name. | |
85 error = open('error proteins.txt', 'a+') | |
86 i = 0 | |
87 while i == 0: | |
88 try: | |
89 data = urllib2.urlopen("http://www.uniprot.org/uniprot/" + uniprot_accession_in | |
90 + ".fasta") | |
91 break | |
92 except urllib2.HTTPError, err: | |
93 i = i + 1 | |
94 if i == 50: | |
95 sys.exit("More than 50 errors. Check your file or try again later.") | |
96 if err.code == 404: | |
97 error.write(uniprot_accession_in + '\t' + "Invalid URL. Check protein" + '\n') | |
98 seqlength = 'NA' | |
99 genename = 'NA' | |
100 return ReturnValue1(seqlength, genename) | |
101 elif err.code == 302: | |
102 sys.exit("Request timed out. Check connection and try again.") | |
103 else: | |
104 sys.exit("Uniprot had some other error") | |
105 lines = data.readlines() | |
106 header = lines[0] | |
107 lst = header.split('|') | |
108 lst2 = lst[2].split(' ') | |
109 swissprot = lst2[0] | |
110 uniprot_acc = lst[1] | |
111 if lines == []: | |
112 error.write(uniprot_accession_in + '\t' + "Blank Fasta" + '\n') | |
113 error.close | |
114 uniprot_acc = 'NA' | |
115 genename = 'NA' | |
116 return ReturnValue1(uniprot_acc, genename, swissprot) | |
117 if lines != []: | |
118 seqlength = 0 | |
119 header = lines[0] | |
120 if 'GN=' in header: | |
121 lst = header.split('GN=') | |
122 lst2 = lst[1].split(' ') | |
123 genename = lst2[0] | |
124 error.close | |
125 return ReturnValue1(uniprot_acc, genename, swissprot) | |
126 if 'GN=' not in header: | |
127 genename = 'NA' | |
128 error.close | |
129 return ReturnValue1(uniprot_acc, genename, swissprot) | |
130 | |
131 | |
132 def dd_network(listfile, SAINTscore, CPDB_filter): | |
133 # Filter by SS and CPDB. | |
134 data = read_listfile(listfile).data | |
135 # Change to filtered list. | |
136 SS = (read_listfile(listfile).header).index("SaintScore") | |
137 filt_data = [] | |
138 for i in data: | |
139 if i[SS] >= SAINTscore: | |
140 filt_data.append(i) | |
141 accessions = [] | |
142 for i in filt_data: | |
143 accessions.append(get_info(i[1]).sp) | |
144 GO = [] | |
145 for i in CPDB[2:]: | |
146 if i[3] >= CPDB_filter: | |
147 # Filter interaction confidence. | |
148 GO.append(i[2]) | |
149 # All known interactions. | |
150 GO2 = [] | |
151 for i in GO: | |
152 GO2.append(i.split(',')) | |
153 # Make interactions list friendly. | |
154 unfiltered_network = {} | |
155 for i in accessions: | |
156 interactions = [] | |
157 for j in GO2: | |
158 if i in j: | |
159 # Find the interactions. | |
160 if j not in interactions: | |
161 # Dont add duplicate interactions. | |
162 interactions.append(j) | |
163 merged = list(itertools.chain(*interactions)) | |
164 # Flatten list of lists. | |
165 unfiltered_network[i] = merged | |
166 # Assign all possible interactions to protein in a dictionary. | |
167 dd_network = {} | |
168 # Data dependent network. | |
169 for i in unfiltered_network: | |
170 temp = [] | |
171 for j in unfiltered_network[i]: | |
172 if j in accessions: | |
173 if j not in temp: | |
174 if j != i: | |
175 temp.append(j) | |
176 dd_network[i] = temp | |
177 return dd_network | |
178 | |
179 | |
180 def cytoscape(dd_network, listfile, SAINTscore): | |
181 with open('network.sif', 'wt') as y: | |
182 data = read_listfile(listfile).data | |
183 SS = (read_listfile(listfile).header).index("SaintScore") | |
184 filt_data = [] | |
185 for i in data: | |
186 if i[SS] >= SAINTscore: | |
187 filt_data.append(i) | |
188 header = ["Prey", "Interactions"] | |
189 header = '\t'.join(header) | |
190 y.write(header + '\n') | |
191 for i in filt_data: | |
192 if dd_network[i[1]] != []: | |
193 lst = [] | |
194 for j in dd_network[i[1]]: | |
195 lst.append(j) | |
196 for j in lst: | |
197 y.write(i[1]+'\t'+'pp'+'\t' + j+'\n') | |
198 | |
199 | |
200 if Species == "Human": | |
201 CPDB = readtab(str(db_path) + 'ConsensusPathDB_human_PPI.txt') | |
202 if Species == "Yeast": | |
203 CPDB = readtab(str(db_path) + 'ConsensusPathDB_yeast_PPI.txt') | |
204 if Species == "Mouse": | |
205 CPDB = readtab(str(db_path) +'ConsensusPathDB_mouse_PPI.txt') | |
206 if __name__ == '__main__': | |
207 main(listfile, SAINT_cutoff, Int_conf, Species) | |
208 os.rename('network.sif', str(cyto_file)) |