comparison SAINT_preprocessing.py @ 53:26b5bd96332e draft

Uploaded
author bornea
date Sat, 27 Aug 2016 20:25:09 -0400
parents 09b89febcf98
children
comparison
equal deleted inserted replaced
52:8031a47f67c6 53:26b5bd96332e
108 data = open(fasta_db, 'r') 108 data = open(fasta_db, 'r')
109 data_lines = data.readlines() 109 data_lines = data.readlines()
110 db_len = len(data_lines) 110 db_len = len(data_lines)
111 seqlength = 0 111 seqlength = 0
112 count = 0 112 count = 0
113 last_line = data_lines[-1]
113 for data_line in data_lines: 114 for data_line in data_lines:
114 if ">sp" in data_line: 115 if ">sp" in data_line:
115 namer = data_line.split("|")[2] 116 namer = data_line.split("|")[2]
116 if uniprot_accession_in == data_line.split("|")[1]: 117 if uniprot_accession_in == data_line.split("|")[1]:
117 match = count + 1 118 match = count + 1
122 if 'GN=' not in data_line: 123 if 'GN=' not in data_line:
123 genename = 'NA' 124 genename = 'NA'
124 while ">sp" not in data_lines[match]: 125 while ">sp" not in data_lines[match]:
125 if match <= db_len: 126 if match <= db_len:
126 seqlength = seqlength + len(data_lines[match].strip()) 127 seqlength = seqlength + len(data_lines[match].strip())
128 if data_lines[match] == last_line:
129 break
127 match = match + 1 130 match = match + 1
128 else: 131 else:
129 break 132 break
130 return ReturnValue1(seqlength, genename) 133 return ReturnValue1(seqlength, genename)
131 if uniprot_accession_in == namer.split(" ")[0]: 134 if uniprot_accession_in == namer.split(" ")[0]:
138 if 'GN=' not in data_line: 141 if 'GN=' not in data_line:
139 genename = 'NA' 142 genename = 'NA'
140 while ">sp" not in data_lines[match]: 143 while ">sp" not in data_lines[match]:
141 if match <= db_len: 144 if match <= db_len:
142 seqlength = seqlength + len(data_lines[match].strip()) 145 seqlength = seqlength + len(data_lines[match].strip())
146 if data_lines[match] == last_line:
147 break
143 match = match + 1 148 match = match + 1
144 else: 149 else:
145 break 150 break
146 return ReturnValue1(seqlength, genename) 151 return ReturnValue1(seqlength, genename)
147 count = count + 1 152 count = count + 1