Mercurial > repos > bornea > saint_preprocessing
comparison Protein_report_processing.py @ 58:4f843e0c6c40 draft
Uploaded
author | bornea |
---|---|
date | Sat, 27 Aug 2016 21:03:57 -0400 |
parents | 18389ccc7629 |
children | 792056ff8ed5 |
comparison
equal
deleted
inserted
replaced
57:677d224656e0 | 58:4f843e0c6c40 |
---|---|
89 data = open(fasta_db, 'r') | 89 data = open(fasta_db, 'r') |
90 data_lines = data.readlines() | 90 data_lines = data.readlines() |
91 db_len = len(data_lines) | 91 db_len = len(data_lines) |
92 seqlength = 0 | 92 seqlength = 0 |
93 count = 0 | 93 count = 0 |
94 last_line = data_lines[-1] | |
94 for data_line in data_lines: | 95 for data_line in data_lines: |
95 if ">sp" in data_line: | 96 if ">sp" in data_line: |
96 namer = data_line.split("|")[2] | 97 namer = data_line.split("|")[2] |
97 if uniprot_accession_in == data_line.split("|")[1]: | 98 if uniprot_accession_in == data_line.split("|")[1]: |
98 match = count + 1 | 99 match = count + 1 |
103 if 'GN=' not in data_line: | 104 if 'GN=' not in data_line: |
104 genename = 'NA' | 105 genename = 'NA' |
105 while ">sp" not in data_lines[match]: | 106 while ">sp" not in data_lines[match]: |
106 if match <= db_len: | 107 if match <= db_len: |
107 seqlength = seqlength + len(data_lines[match].strip()) | 108 seqlength = seqlength + len(data_lines[match].strip()) |
109 if data_lines[match] == last_line: | |
110 break | |
108 match = match + 1 | 111 match = match + 1 |
109 else: | 112 else: |
110 break | 113 break |
111 return ReturnValue1(seqlength, genename) | 114 return ReturnValue1(seqlength, genename) |
112 if uniprot_accession_in == namer.split(" ")[0]: | 115 if uniprot_accession_in == namer.split(" ")[0]: |
119 if 'GN=' not in data_line: | 122 if 'GN=' not in data_line: |
120 genename = 'NA' | 123 genename = 'NA' |
121 while ">sp" not in data_lines[match]: | 124 while ">sp" not in data_lines[match]: |
122 if match <= db_len: | 125 if match <= db_len: |
123 seqlength = seqlength + len(data_lines[match].strip()) | 126 seqlength = seqlength + len(data_lines[match].strip()) |
127 if data_lines[match] == last_line: | |
128 break | |
124 match = match + 1 | 129 match = match + 1 |
125 else: | 130 else: |
126 break | 131 break |
127 return ReturnValue1(seqlength, genename) | 132 return ReturnValue1(seqlength, genename) |
128 count = count + 1 | 133 count = count + 1 |