Mercurial > repos > bornea > saint_preprocessing
comparison Protein_report_processing.py @ 58:4f843e0c6c40 draft
Uploaded
| author | bornea |
|---|---|
| date | Sat, 27 Aug 2016 21:03:57 -0400 |
| parents | 18389ccc7629 |
| children | 792056ff8ed5 |
comparison
equal
deleted
inserted
replaced
| 57:677d224656e0 | 58:4f843e0c6c40 |
|---|---|
| 89 data = open(fasta_db, 'r') | 89 data = open(fasta_db, 'r') |
| 90 data_lines = data.readlines() | 90 data_lines = data.readlines() |
| 91 db_len = len(data_lines) | 91 db_len = len(data_lines) |
| 92 seqlength = 0 | 92 seqlength = 0 |
| 93 count = 0 | 93 count = 0 |
| 94 last_line = data_lines[-1] | |
| 94 for data_line in data_lines: | 95 for data_line in data_lines: |
| 95 if ">sp" in data_line: | 96 if ">sp" in data_line: |
| 96 namer = data_line.split("|")[2] | 97 namer = data_line.split("|")[2] |
| 97 if uniprot_accession_in == data_line.split("|")[1]: | 98 if uniprot_accession_in == data_line.split("|")[1]: |
| 98 match = count + 1 | 99 match = count + 1 |
| 103 if 'GN=' not in data_line: | 104 if 'GN=' not in data_line: |
| 104 genename = 'NA' | 105 genename = 'NA' |
| 105 while ">sp" not in data_lines[match]: | 106 while ">sp" not in data_lines[match]: |
| 106 if match <= db_len: | 107 if match <= db_len: |
| 107 seqlength = seqlength + len(data_lines[match].strip()) | 108 seqlength = seqlength + len(data_lines[match].strip()) |
| 109 if data_lines[match] == last_line: | |
| 110 break | |
| 108 match = match + 1 | 111 match = match + 1 |
| 109 else: | 112 else: |
| 110 break | 113 break |
| 111 return ReturnValue1(seqlength, genename) | 114 return ReturnValue1(seqlength, genename) |
| 112 if uniprot_accession_in == namer.split(" ")[0]: | 115 if uniprot_accession_in == namer.split(" ")[0]: |
| 119 if 'GN=' not in data_line: | 122 if 'GN=' not in data_line: |
| 120 genename = 'NA' | 123 genename = 'NA' |
| 121 while ">sp" not in data_lines[match]: | 124 while ">sp" not in data_lines[match]: |
| 122 if match <= db_len: | 125 if match <= db_len: |
| 123 seqlength = seqlength + len(data_lines[match].strip()) | 126 seqlength = seqlength + len(data_lines[match].strip()) |
| 127 if data_lines[match] == last_line: | |
| 128 break | |
| 124 match = match + 1 | 129 match = match + 1 |
| 125 else: | 130 else: |
| 126 break | 131 break |
| 127 return ReturnValue1(seqlength, genename) | 132 return ReturnValue1(seqlength, genename) |
| 128 count = count + 1 | 133 count = count + 1 |
