Mercurial > repos > bornea > saint_preprocessing
comparison SAINT_preprocessing.py @ 11:b688d0dae86b draft
Uploaded
author | bornea |
---|---|
date | Tue, 12 Apr 2016 12:54:38 -0400 |
parents | 1b0547d3c7bc |
children | 3e1b66d58f94 |
comparison
equal
deleted
inserted
replaced
10:1b0547d3c7bc | 11:b688d0dae86b |
---|---|
23 # 5) make_bait: String of bait names, assignment, and test or control boolean | 23 # 5) make_bait: String of bait names, assignment, and test or control boolean |
24 ####################################################################################### | 24 ####################################################################################### |
25 | 25 |
26 import sys | 26 import sys |
27 import os.path | 27 import os.path |
28 import re | |
28 | 29 |
29 | 30 |
30 infile = sys.argv[1] | 31 infile = sys.argv[1] |
31 #Scaffold "Samples Report" output. | 32 #Scaffold "Samples Report" output. |
32 prey = sys.argv[2] | 33 prey = sys.argv[2] |
195 # Cut off blank line and END OF FILE. | 196 # Cut off blank line and END OF FILE. |
196 proteins = [] | 197 proteins = [] |
197 for Scaffold_line in data: | 198 for Scaffold_line in data: |
198 Scaffold_line[4] = Scaffold_line[4].split()[0] | 199 Scaffold_line[4] = Scaffold_line[4].split()[0] |
199 # Removes the (+##) that sometimes is attached. | 200 # Removes the (+##) that sometimes is attached. |
200 for protein in data: | 201 uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") |
201 proteins.append(protein[prot_start]) | 202 for protein in data: |
203 prot_id = uniprot_re.match(protein[prot_start]) | |
204 proteins.append(prot_id.group()) | |
202 return ReturnValue2(data, proteins, header) | 205 return ReturnValue2(data, proteins, header) |
203 | 206 |
204 | 207 |
205 def make_inter(Scaffold_input): | 208 def make_inter(Scaffold_input): |
206 bait = readtab(baitfile) | 209 bait = readtab(baitfile) |