Mercurial > repos > bornea > saint_preprocessing
changeset 11:b688d0dae86b draft
Uploaded
author | bornea |
---|---|
date | Tue, 12 Apr 2016 12:54:38 -0400 |
parents | 1b0547d3c7bc |
children | 3e1b66d58f94 |
files | SAINT_preprocessing.py |
diffstat | 1 files changed, 5 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/SAINT_preprocessing.py Tue Mar 29 14:46:04 2016 -0400 +++ b/SAINT_preprocessing.py Tue Apr 12 12:54:38 2016 -0400 @@ -25,6 +25,7 @@ import sys import os.path +import re infile = sys.argv[1] @@ -197,8 +198,10 @@ for Scaffold_line in data: Scaffold_line[4] = Scaffold_line[4].split()[0] # Removes the (+##) that sometimes is attached. - for protein in data: - proteins.append(protein[prot_start]) + uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") + for protein in data: + prot_id = uniprot_re.match(protein[prot_start]) + proteins.append(prot_id.group()) return ReturnValue2(data, proteins, header)