| 4 | 1 ################################################################################ | 
|  | 2 # This program will read in a SAINT 'list.txt' file and the interactions from | 
|  | 3 # the consensus path db database and return all the interactions that we saw in | 
|  | 4 # our experiment in a format suitable for cytoscape. This allows us to filter | 
|  | 5 # before getting PPIs so that it doesn't affect our SAINT score or include | 
|  | 6 # interactions that don't score well | 
|  | 7 ################################################################################ | 
|  | 8 import urllib2 | 
|  | 9 import itertools | 
|  | 10 import sys | 
|  | 11 ################################################################################ | 
|  | 12 ## REQUIRED INPUT ## | 
|  | 13 | 
|  | 14 # 1) listfile: SAINTexpress output | 
|  | 15 # 2) SAINT_cutoff: Saint score cutoff for import (between 0 and 1) | 
|  | 16 # 3) Int_conf: Confidence of PPI from CPDB to include | 
|  | 17 #       - low: no filtering | 
|  | 18 #       - medium: >0.5 | 
|  | 19 #       - high: >0.7 | 
|  | 20 #       - very high: >0.9 | 
|  | 21 # 4) Species: Human, Yeast, or Mouse | 
|  | 22 ############################################################################### | 
|  | 23 listfile = sys.argv[1] | 
|  | 24 SAINT_cutoff = sys.argv[2] | 
|  | 25 Int_conf = sys.argv[3] | 
|  | 26 Species = sys.argv[4] | 
|  | 27 cyto_file = sys.argv[5] | 
| 9 | 28 db_path = sys.argv[6] | 
| 4 | 29 ############################################################################### | 
|  | 30 class ReturnValue1(object): | 
|  | 31     def __init__(self, uniprot_acc, gene, swissprot): | 
|  | 32         self.up = uniprot_acc | 
|  | 33         self.gn = gene | 
|  | 34         self.sp = swissprot | 
|  | 35 class ReturnValue2(object): | 
|  | 36     def __init__(self, getdata, getproteins, getheader): | 
|  | 37         self.data = getdata | 
|  | 38         self.proteins = getproteins | 
|  | 39         self.header = getheader | 
|  | 40 def main(listfile, SAINT_cutoff, Int_conf, Species): | 
|  | 41     cytoscape(dd_network(listfile, SAINT_cutoff, Int_conf), listfile, SAINT_cutoff) | 
|  | 42 def readtab(infile): | 
|  | 43     with open(infile,'r') as x: # read in tab-delim text | 
|  | 44         output = [] | 
|  | 45         for line in x: | 
|  | 46             line = line.strip() | 
|  | 47             temp = line.split('\t') | 
|  | 48             output.append(temp) | 
|  | 49     return output | 
|  | 50 def read_listfile(listfile): # Get data, proteins and header from scaffold output | 
|  | 51     dupes = readtab(listfile) | 
|  | 52     header = dupes[0] | 
|  | 53     prot_start = header.index("PreyGene")-1 | 
|  | 54     data = dupes[1:] # cut off blank line and END OF FILE | 
|  | 55     proteins = [] | 
|  | 56     for protein in data: | 
|  | 57         proteins.append(protein[prot_start]) | 
|  | 58     return ReturnValue2(data, proteins, header) | 
|  | 59 | 
|  | 60 def get_info(uniprot_accession_in): #get aa lengths and gene name | 
|  | 61     error = open('error proteins.txt', 'a+') | 
|  | 62     i=0 | 
|  | 63     while i==0: | 
|  | 64         try: | 
|  | 65             data = urllib2.urlopen("http://www.uniprot.org/uniprot/" + uniprot_accession_in + ".fasta") | 
|  | 66             break | 
|  | 67         except urllib2.HTTPError, err: | 
|  | 68             i = i + 1 | 
|  | 69             if i == 50: | 
|  | 70                 sys.exit("More than 50 errors. Check your file or try again later.") | 
|  | 71             if err.code == 404: | 
|  | 72                 error.write(uniprot_accession_in + '\t' + "Invalid URL. Check protein" + '\n') | 
|  | 73                 seqlength = 'NA' | 
|  | 74                 genename = 'NA' | 
|  | 75                 return ReturnValue1(seqlength, genename) | 
|  | 76             elif err.code == 302: | 
|  | 77                 sys.exit("Request timed out. Check connection and try again.") | 
|  | 78             else: | 
|  | 79                 sys.exit("Uniprot had some other error") | 
|  | 80     lines = data.readlines() | 
|  | 81     header = lines[0] | 
|  | 82     lst = header.split('|') | 
|  | 83     lst2 = lst[2].split(' ') | 
|  | 84     swissprot = lst2[0] | 
|  | 85     uniprot_acc = lst[1] | 
|  | 86     if lines == []: | 
|  | 87         error.write(uniprot_accession_in + '\t' + "Blank Fasta" + '\n') | 
|  | 88         error.close | 
|  | 89         uniprot_acc = 'NA' | 
|  | 90         genename = 'NA' | 
|  | 91         return ReturnValue1(uniprot_acc, genename, swissprot) | 
|  | 92     if lines != []: | 
|  | 93         seqlength = 0 | 
|  | 94         header = lines[0] | 
|  | 95         if 'GN=' in header: | 
|  | 96             lst = header.split('GN=') | 
|  | 97             lst2 = lst[1].split(' ') | 
|  | 98             genename = lst2[0] | 
|  | 99             error.close | 
|  | 100             return ReturnValue1(uniprot_acc, genename, swissprot) | 
|  | 101         if 'GN=' not in header: | 
|  | 102             genename = 'NA' | 
|  | 103             error.close | 
|  | 104             return ReturnValue1(uniprot_acc, genename, swissprot) | 
|  | 105 | 
|  | 106 def dd_network(listfile, SAINTscore, CPDB_filter): ## Filter by SS and CPDB | 
|  | 107     data = read_listfile(listfile).data # change to filtered list | 
|  | 108     SS = (read_listfile(listfile).header).index("SaintScore") | 
|  | 109     filt_data = [] | 
|  | 110     for i in data: | 
|  | 111         if i[SS] >= SAINTscore: | 
|  | 112             filt_data.append(i) | 
|  | 113     accessions = [] | 
|  | 114     for i in filt_data: | 
|  | 115         accessions.append(get_info(i[1]).sp) | 
|  | 116     GO=[] | 
|  | 117     for i in CPDB[2:]: | 
|  | 118         if i[3] >= CPDB_filter: # filter interaction confidence | 
|  | 119             GO.append(i[2]) # all known interactions | 
|  | 120     GO2 = [] | 
|  | 121     for i in GO: | 
|  | 122         GO2.append(i.split(',')) # make interactions list friendly | 
|  | 123     unfiltered_network = {} | 
|  | 124     for i in accessions: | 
|  | 125         interactions = [] | 
|  | 126         for j in GO2: | 
|  | 127             if i in j: # find the interactions | 
|  | 128                 if j not in interactions:# dont add duplicate interactions | 
|  | 129                     interactions.append(j) | 
|  | 130         merged = list(itertools.chain(*interactions)) # flatten list of lists | 
|  | 131         unfiltered_network[i]=merged # assign all possible interactions to protein in a dictionary | 
|  | 132     dd_network = {} #data dependent network | 
|  | 133     for i in unfiltered_network: | 
|  | 134         temp = [] | 
|  | 135         for j in unfiltered_network[i]: | 
|  | 136             if j in accessions: | 
|  | 137                 if j not in temp: | 
|  | 138                     if j != i: | 
|  | 139                         temp.append(j) | 
|  | 140         dd_network[i]=temp | 
|  | 141     return dd_network | 
|  | 142 def cytoscape(dd_network, listfile, SAINTscore): | 
|  | 143     with open('network.sif','wt') as y: | 
|  | 144         data = read_listfile(listfile).data | 
|  | 145         SS = (read_listfile(listfile).header).index("SaintScore") | 
|  | 146         filt_data = [] | 
|  | 147         for i in data: | 
|  | 148             if i[SS] >= SAINTscore: | 
|  | 149                 filt_data.append(i) | 
|  | 150         header = ["Prey", "Interactions"] | 
|  | 151         header = '\t'.join(header) | 
|  | 152         y.write(header + '\n') | 
|  | 153         for i in filt_data: | 
|  | 154             if dd_network[i[1]] != []: | 
|  | 155                 lst = [] | 
|  | 156                 #x='\t'.join(i) | 
|  | 157                 for j in dd_network[i[1]]: | 
|  | 158                     lst.append(j) | 
|  | 159                 for j in lst: | 
|  | 160                     y.write(i[1]+'\t'+'pp'+'\t' + j+'\n') | 
|  | 161 if Species == "Human": | 
|  | 162         CPDB = readtab(str(db_path) + 'ConsensusPathDB_human_PPI.txt') | 
|  | 163 if Species == "Yeast": | 
|  | 164         CPDB = readtab(str(db_path) + 'ConsensusPathDB_yeast_PPI.txt') | 
|  | 165 if Species == "Mouse": | 
|  | 166         CPDB = readtab(str(db_path) +'ConsensusPathDB_mouse_PPI.txt') | 
|  | 167 if __name__ == '__main__': | 
|  | 168     main(listfile, SAINT_cutoff, Int_conf, Species) | 
|  | 169     #main("Crizo_list.txt", 0.7, 0.7, 'Human') | 
|  | 170     os.rename('network.sif', str(cyto_file)) |