comparison Protein_report_processing.py @ 58:4f843e0c6c40 draft

Uploaded
author bornea
date Sat, 27 Aug 2016 21:03:57 -0400
parents 18389ccc7629
children 792056ff8ed5
comparison
equal deleted inserted replaced
57:677d224656e0 58:4f843e0c6c40
89 data = open(fasta_db, 'r') 89 data = open(fasta_db, 'r')
90 data_lines = data.readlines() 90 data_lines = data.readlines()
91 db_len = len(data_lines) 91 db_len = len(data_lines)
92 seqlength = 0 92 seqlength = 0
93 count = 0 93 count = 0
94 last_line = data_lines[-1]
94 for data_line in data_lines: 95 for data_line in data_lines:
95 if ">sp" in data_line: 96 if ">sp" in data_line:
96 namer = data_line.split("|")[2] 97 namer = data_line.split("|")[2]
97 if uniprot_accession_in == data_line.split("|")[1]: 98 if uniprot_accession_in == data_line.split("|")[1]:
98 match = count + 1 99 match = count + 1
103 if 'GN=' not in data_line: 104 if 'GN=' not in data_line:
104 genename = 'NA' 105 genename = 'NA'
105 while ">sp" not in data_lines[match]: 106 while ">sp" not in data_lines[match]:
106 if match <= db_len: 107 if match <= db_len:
107 seqlength = seqlength + len(data_lines[match].strip()) 108 seqlength = seqlength + len(data_lines[match].strip())
109 if data_lines[match] == last_line:
110 break
108 match = match + 1 111 match = match + 1
109 else: 112 else:
110 break 113 break
111 return ReturnValue1(seqlength, genename) 114 return ReturnValue1(seqlength, genename)
112 if uniprot_accession_in == namer.split(" ")[0]: 115 if uniprot_accession_in == namer.split(" ")[0]:
119 if 'GN=' not in data_line: 122 if 'GN=' not in data_line:
120 genename = 'NA' 123 genename = 'NA'
121 while ">sp" not in data_lines[match]: 124 while ">sp" not in data_lines[match]:
122 if match <= db_len: 125 if match <= db_len:
123 seqlength = seqlength + len(data_lines[match].strip()) 126 seqlength = seqlength + len(data_lines[match].strip())
127 if data_lines[match] == last_line:
128 break
124 match = match + 1 129 match = match + 1
125 else: 130 else:
126 break 131 break
127 return ReturnValue1(seqlength, genename) 132 return ReturnValue1(seqlength, genename)
128 count = count + 1 133 count = count + 1